commit 2111ae5fa637391d747cb3e8eda7f4d14d1c6e9f
parent 86d0df769de84a2638ea720421a4d891acaf5740
Author: [email protected] <[email protected]>
Date: Wed, 9 Nov 2022 19:29:10 +0000
Fixes for GCC and MSVC
Diffstat:
10 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/cmake/aarch64.cmake b/cmake/aarch64.cmake
@@ -22,7 +22,7 @@ if (NOT GCC_VER)
endif ()
set (SYS_PATHS "-isystem ${ARM_ROOT}/c++/${GCC_VER} -isystem ${ARM_ROOT}/c++/${GCC_VER}/backward -isystem ${ARM_ROOT}/c++/${GCC_VER}/${TGT_TRIPLET} -isystem ${ARM_ROOT}")
-set (ARM_COMMON_FLAGS "-target ${TGT_TRIPLET} -mcpu=cortex-a72 -static")
+set (ARM_COMMON_FLAGS "-std=gnu++17 -target ${TGT_TRIPLET} -mcpu=cortex-a72 -static")
set (CMAKE_CXX_FLAGS "${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "")
set (CMAKE_C_FLAGS " ${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "")
diff --git a/cmake/arm.cmake b/cmake/arm.cmake
@@ -21,7 +21,7 @@ if (NOT GCC_VER)
endif ()
set (SYS_PATHS "-isystem ${ARM_ROOT}/c++/${GCC_VER} -isystem ${ARM_ROOT}/c++/${GCC_VER}/backward -isystem ${ARM_ROOT}/c++/${GCC_VER}/${TGT_TRIPLET} -isystem ${ARM_ROOT}")
-set (ARM_COMMON_FLAGS "-target ${TGT_TRIPLET} -mcpu=cortex-a15 -mfpu=neon-vfpv4 -mfloat-abi=hard -static")
+set (ARM_COMMON_FLAGS "-std=gnu++17 -target ${TGT_TRIPLET} -mcpu=cortex-a15 -mfpu=neon-vfpv4 -mfloat-abi=hard -static")
set (CMAKE_CXX_FLAGS "${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "")
set (CMAKE_C_FLAGS " ${SYS_PATHS} ${ARM_COMMON_FLAGS}" CACHE STRING "")
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -565,10 +565,9 @@ template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims,
KFR_INTRINSIC vec<T, N> get_elements(const expression_function<Fn, Args...>& self, const shape<Dims>& index,
const axis_params<Axis, N>& sh)
{
- constexpr index_t outdims = Tr::dims;
return self.fold_idx(
[&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> {
- return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...);
+ return self.fn(internal::get_arg<Tr::dims>(self, index, sh, idx)...);
});
}
diff --git a/include/kfr/base/random_bits.hpp b/include/kfr/base/random_bits.hpp
@@ -79,6 +79,8 @@ KFR_INTRINSIC void random_next(random_state& state)
state.v = bitcast<u32>(rotateright<3>(
bitcast<u8>(fmadd(static_cast<u32x4>(state.v), static_cast<u32x4>(mul), static_cast<u32x4>(add)))));
}
+
+#ifndef KFR_DISABLE_READCYCLECOUNTER
KFR_INTRINSIC random_state random_init()
{
random_state state;
@@ -87,6 +89,7 @@ KFR_INTRINSIC random_state random_init()
random_next(state);
return state;
}
+#endif
KFR_INTRINSIC random_state random_init(u32 x0, u32 x1, u32 x2, u32 x3)
{
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -383,7 +383,7 @@ extern char* gets(char* __s);
#elif defined(CMT_MSVC_ATTRIBUTES)
#ifndef CMT_NO_FORCE_INLINE
-#if _MSC_VER >= 1927
+#if _MSC_VER >= 1927 && _MSVC_LANG >= 202002L
#define CMT_ALWAYS_INLINE [[msvc::forceinline]]
#else
#define CMT_ALWAYS_INLINE __forceinline
@@ -396,7 +396,7 @@ extern char* gets(char* __s);
#define CMT_INLINE inline CMT_ALWAYS_INLINE
#define CMT_INLINE_MEMBER CMT_ALWAYS_INLINE
#if _MSC_VER >= 1927
-#define CMT_INLINE_LAMBDA CMT_ALWAYS_INLINE
+#define CMT_INLINE_LAMBDA [[msvc::forceinline]]
#else
#define CMT_INLINE_LAMBDA
#endif
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -23,6 +23,7 @@ CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wdeprecated-declarations")
CMT_PRAGMA_MSVC(warning(push))
CMT_PRAGMA_MSVC(warning(disable : 4814))
+CMT_PRAGMA_MSVC(warning(disable : 4308))
CMT_PRAGMA_MSVC(warning(disable : 4014))
namespace cometa
diff --git a/include/kfr/dft/impl/dft-impl.hpp b/include/kfr/dft/impl/dft-impl.hpp
@@ -527,7 +527,7 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order)
for (size_t i = 0; i < count[r]; i++)
{
iterations /= r;
- radices[radices_size++] = r;
+ radices[radices_size++] = static_cast<int>(r);
if (iterations == 1)
prepare_dft_stage(self, r, iterations, blocks, ctrue);
else
@@ -539,7 +539,7 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order)
if (cur_size > 1)
{
iterations /= cur_size;
- radices[radices_size++] = cur_size;
+ radices[radices_size++] = static_cast<int>(cur_size);
if (iterations == 1)
prepare_dft_stage(self, cur_size, iterations, blocks, ctrue);
else
diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp
@@ -1106,12 +1106,14 @@ KFR_INTRINSIC void initialize_stages(dft_plan<T>* self)
{
init_fft(self, self->size, dft_order::normal);
}
-#ifndef KFR_DFT_NO_NPo2
else
{
+#ifndef KFR_DFT_NO_NPo2
init_dft(self, self->size, dft_order::normal);
- }
+#else
+ KFR_REPORT_ERROR(logic, "Non-power of 2 FFT is disabled");
#endif
+ }
}
template <typename T>
diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp
@@ -1695,10 +1695,10 @@ KFR_INTRINSIC simd<float, 8> simd_vec_shuffle(simd_t<float, 8>, const simd<float
{
return simd_from_halves(simd_t<float, 8>{},
universal_shuffle(simd_t<float, 4>{}, simd_get_low(simd_t<float, 8>{}, x),
- shuffle_mask<8, I0, I1, I2, I3>::value),
+ csizes<I0, I1, I2, I3>),
universal_shuffle(simd_t<float, 4>{},
simd_get_high(simd_t<float, 8>{}, x),
- shuffle_mask<8, I4, I5, I6, I7>::value));
+ csizes<I4, I5, I6, I7>));
}
}
else
@@ -1735,10 +1735,9 @@ KFR_INTRINSIC simd<double, 4> simd_vec_shuffle(simd_t<double, 4>, const simd<dou
{
return simd_from_halves(
simd_t<double, 4>{},
- universal_shuffle(simd_t<double, 2>{}, simd_get_low(simd_t<double, 4>{}, x),
- shuffle_mask<2, I0, I1>::value),
+ universal_shuffle(simd_t<double, 2>{}, simd_get_low(simd_t<double, 4>{}, x), csizes<I0, I1>),
universal_shuffle(simd_t<double, 2>{}, simd_get_high(simd_t<double, 4>{}, x),
- shuffle_mask<2, I2, I3>::value));
+ csizes<I2, I3>));
}
}
else
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -24,10 +24,24 @@ constexpr ctypes_t<float, double> dft_float_types{};
constexpr ctypes_t<float> dft_float_types{};
#endif
-#if defined(__clang__) && defined(CMT_ARCH_X86)
+#if defined(CMT_ARCH_X86)
-static void full_barrier() { asm volatile("mfence" ::: "memory"); }
-static void dont_optimize(const void* in) { asm volatile("" : "+m"(in)); }
+static void full_barrier()
+{
+#ifdef CMT_COMPILER_GNU
+ asm volatile("mfence" ::: "memory");
+#else
+ _ReadWriteBarrier();
+#endif
+}
+static CMT_NOINLINE void dont_optimize(const void* in)
+{
+#ifdef CMT_COMPILER_GNU
+ asm volatile("" : "+m"(in));
+#else
+ volatile uint8_t a = *reinterpret_cast<const uint8_t*>(in);
+#endif
+}
template <typename T>
static void perf_test_t(int size)
@@ -69,10 +83,12 @@ TEST(test_performance)
perf_test(size);
}
+#ifndef KFR_DFT_NO_NPo2
perf_test(210);
perf_test(3150);
perf_test(211);
perf_test(3163);
+#endif
}
#endif