kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 2111ae5fa637391d747cb3e8eda7f4d14d1c6e9f
parent 86d0df769de84a2638ea720421a4d891acaf5740
Author: [email protected] <[email protected]>
Date:   Wed,  9 Nov 2022 19:29:10 +0000

Fixes for GCC and MSVC

Diffstat:
Mcmake/aarch64.cmake | 2+-
Mcmake/arm.cmake | 2+-
Minclude/kfr/base/expression.hpp | 3+--
Minclude/kfr/base/random_bits.hpp | 3+++
Minclude/kfr/cident.h | 4++--
Minclude/kfr/cometa.hpp | 1+
Minclude/kfr/dft/impl/dft-impl.hpp | 4++--
Minclude/kfr/dft/impl/fft-impl.hpp | 6++++--
Minclude/kfr/simd/impl/backend_generic.hpp | 9++++-----
Mtests/dft_test.cpp | 22+++++++++++++++++++---
10 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/cmake/aarch64.cmake b/cmake/aarch64.cmake @@ -22,7 +22,7 @@ if (NOT GCC_VER) endif () set (SYS_PATHS "-isystem ${ARM_ROOT}/c++/${GCC_VER} -isystem ${ARM_ROOT}/c++/${GCC_VER}/backward -isystem ${ARM_ROOT}/c++/${GCC_VER}/${TGT_TRIPLET} -isystem ${ARM_ROOT}") -set (ARM_COMMON_FLAGS "-target ${TGT_TRIPLET} -mcpu=cortex-a72 -static") +set (ARM_COMMON_FLAGS "-std=gnu++17 -target ${TGT_TRIPLET} -mcpu=cortex-a72 -static") set (CMAKE_CXX_FLAGS "${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "") set (CMAKE_C_FLAGS " ${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "") diff --git a/cmake/arm.cmake b/cmake/arm.cmake @@ -21,7 +21,7 @@ if (NOT GCC_VER) endif () set (SYS_PATHS "-isystem ${ARM_ROOT}/c++/${GCC_VER} -isystem ${ARM_ROOT}/c++/${GCC_VER}/backward -isystem ${ARM_ROOT}/c++/${GCC_VER}/${TGT_TRIPLET} -isystem ${ARM_ROOT}") -set (ARM_COMMON_FLAGS "-target ${TGT_TRIPLET} -mcpu=cortex-a15 -mfpu=neon-vfpv4 -mfloat-abi=hard -static") +set (ARM_COMMON_FLAGS "-std=gnu++17 -target ${TGT_TRIPLET} -mcpu=cortex-a15 -mfpu=neon-vfpv4 -mfloat-abi=hard -static") set (CMAKE_CXX_FLAGS "${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "") set (CMAKE_C_FLAGS " ${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "") diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -565,10 +565,9 @@ template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, KFR_INTRINSIC vec<T, N> get_elements(const expression_function<Fn, Args...>& self, const shape<Dims>& index, const axis_params<Axis, N>& sh) { - constexpr index_t outdims = Tr::dims; return self.fold_idx( [&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> { - return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...); + return self.fn(internal::get_arg<Tr::dims>(self, index, sh, idx)...); }); } diff --git a/include/kfr/base/random_bits.hpp b/include/kfr/base/random_bits.hpp @@ -79,6 +79,8 @@ KFR_INTRINSIC void random_next(random_state& state) state.v = bitcast<u32>(rotateright<3>( bitcast<u8>(fmadd(static_cast<u32x4>(state.v), static_cast<u32x4>(mul), static_cast<u32x4>(add))))); } + +#ifndef KFR_DISABLE_READCYCLECOUNTER KFR_INTRINSIC random_state random_init() { random_state state; @@ -87,6 +89,7 @@ KFR_INTRINSIC random_state random_init() random_next(state); return state; } +#endif KFR_INTRINSIC random_state random_init(u32 x0, u32 x1, u32 x2, u32 x3) { diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -383,7 +383,7 @@ extern char* gets(char* __s); #elif defined(CMT_MSVC_ATTRIBUTES) #ifndef CMT_NO_FORCE_INLINE -#if _MSC_VER >= 1927 +#if _MSC_VER >= 1927 && _MSVC_LANG >= 202002L #define CMT_ALWAYS_INLINE [[msvc::forceinline]] #else #define CMT_ALWAYS_INLINE __forceinline @@ -396,7 +396,7 @@ extern char* gets(char* __s); #define CMT_INLINE inline CMT_ALWAYS_INLINE #define CMT_INLINE_MEMBER CMT_ALWAYS_INLINE #if _MSC_VER >= 1927 -#define CMT_INLINE_LAMBDA CMT_ALWAYS_INLINE +#define CMT_INLINE_LAMBDA [[msvc::forceinline]] #else #define CMT_INLINE_LAMBDA #endif diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -23,6 +23,7 @@ CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wdeprecated-declarations") CMT_PRAGMA_MSVC(warning(push)) CMT_PRAGMA_MSVC(warning(disable : 4814)) +CMT_PRAGMA_MSVC(warning(disable : 4308)) CMT_PRAGMA_MSVC(warning(disable : 4014)) namespace cometa diff --git a/include/kfr/dft/impl/dft-impl.hpp b/include/kfr/dft/impl/dft-impl.hpp @@ -527,7 +527,7 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order) for (size_t i = 0; i < count[r]; i++) { iterations /= r; - radices[radices_size++] = r; + radices[radices_size++] = static_cast<int>(r); if (iterations == 1) prepare_dft_stage(self, r, iterations, blocks, ctrue); else @@ -539,7 +539,7 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order) if (cur_size > 1) { iterations /= cur_size; - radices[radices_size++] = cur_size; + radices[radices_size++] = static_cast<int>(cur_size); if (iterations == 1) prepare_dft_stage(self, cur_size, iterations, blocks, ctrue); else diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp @@ -1106,12 +1106,14 @@ KFR_INTRINSIC void initialize_stages(dft_plan<T>* self) { init_fft(self, self->size, dft_order::normal); } -#ifndef KFR_DFT_NO_NPo2 else { +#ifndef KFR_DFT_NO_NPo2 init_dft(self, self->size, dft_order::normal); - } +#else + KFR_REPORT_ERROR(logic, "Non-power of 2 FFT is disabled"); #endif + } } template <typename T> diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp @@ -1695,10 +1695,10 @@ KFR_INTRINSIC simd<float, 8> simd_vec_shuffle(simd_t<float, 8>, const simd<float { return simd_from_halves(simd_t<float, 8>{}, universal_shuffle(simd_t<float, 4>{}, simd_get_low(simd_t<float, 8>{}, x), - shuffle_mask<8, I0, I1, I2, I3>::value), + csizes<I0, I1, I2, I3>), universal_shuffle(simd_t<float, 4>{}, simd_get_high(simd_t<float, 8>{}, x), - shuffle_mask<8, I4, I5, I6, I7>::value)); + csizes<I4, I5, I6, I7>)); } } else @@ -1735,10 +1735,9 @@ KFR_INTRINSIC simd<double, 4> simd_vec_shuffle(simd_t<double, 4>, const simd<dou { return simd_from_halves( simd_t<double, 4>{}, - universal_shuffle(simd_t<double, 2>{}, simd_get_low(simd_t<double, 4>{}, x), - shuffle_mask<2, I0, I1>::value), + universal_shuffle(simd_t<double, 2>{}, simd_get_low(simd_t<double, 4>{}, x), csizes<I0, I1>), universal_shuffle(simd_t<double, 2>{}, simd_get_high(simd_t<double, 4>{}, x), - shuffle_mask<2, I2, I3>::value)); + csizes<I2, I3>)); } } else diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -24,10 +24,24 @@ constexpr ctypes_t<float, double> dft_float_types{}; constexpr ctypes_t<float> dft_float_types{}; #endif -#if defined(__clang__) && defined(CMT_ARCH_X86) +#if defined(CMT_ARCH_X86) -static void full_barrier() { asm volatile("mfence" ::: "memory"); } -static void dont_optimize(const void* in) { asm volatile("" : "+m"(in)); } +static void full_barrier() +{ +#ifdef CMT_COMPILER_GNU + asm volatile("mfence" ::: "memory"); +#else + _ReadWriteBarrier(); +#endif +} +static CMT_NOINLINE void dont_optimize(const void* in) +{ +#ifdef CMT_COMPILER_GNU + asm volatile("" : "+m"(in)); +#else + volatile uint8_t a = *reinterpret_cast<const uint8_t*>(in); +#endif +} template <typename T> static void perf_test_t(int size) @@ -69,10 +83,12 @@ TEST(test_performance) perf_test(size); } +#ifndef KFR_DFT_NO_NPo2 perf_test(210); perf_test(3150); perf_test(211); perf_test(3163); +#endif } #endif