kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 969aee6c30842a4486facc57e088cd177fcc07cb
parent ffa2909116d211648f819de2a2b2be3b0b622bb8
Author: [email protected] <[email protected]>
Date:   Fri, 22 Jul 2016 10:15:00 +0300

generic cpu support

Diffstat:
Minclude/kfr/base/abs.hpp | 28+++++++++++++---------------
Minclude/kfr/base/logical.hpp | 42++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/min_max.hpp | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Minclude/kfr/base/round.hpp | 67+++++++++++++++++++++++++++++++++++++++++++++----------------------
Minclude/kfr/base/saturation.hpp | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Minclude/kfr/base/select.hpp | 6+++---
Minclude/kfr/base/sqrt.hpp | 20++++++++++++++++++++
Minclude/kfr/base/types.hpp | 23++++++++++++++---------
Minclude/kfr/cident.h | 2+-
Mtests/CMakeLists.txt | 3++-
10 files changed, 259 insertions(+), 90 deletions(-)

diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp @@ -37,21 +37,21 @@ namespace kfr namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_abs : in_abs<older(cpu)> +template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu> +struct in_abs : in_abs<older(cpu), cc> { - struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled + struct fn_abs : in_abs<older(cpu), cc>::fn_abs, fn_disabled { }; }; -template <> -struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2> +template <cpu_t cc> +struct in_abs<cpu_t::common, cc> : in_select<cc> { - constexpr static cpu_t cpu = cpu_t::sse2; + constexpr static cpu_t cpu = cpu_t::common; private: - using in_select<cpu_t::sse2>::select; + using in_select<cc>::select; public: template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> @@ -65,18 +65,17 @@ public: return value & invhighbitmask<T>; } - KFR_HANDLE_ALL(abs) KFR_HANDLE_SCALAR(abs) KFR_SPEC_FN(in_abs, abs) }; -template <> -struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2> +template <cpu_t cc> +struct in_abs<cpu_t::ssse3, cc> : in_abs<cpu_t::common>, in_select<cc> { constexpr static cpu_t cpu = cpu_t::ssse3; private: - using in_select<cpu_t::sse2>::select; + using in_select<cc>::select; public: template <size_t N> @@ -100,11 +99,11 @@ public: KFR_SPEC_FN(in_abs, abs) }; -template <> -struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3> +template <cpu_t cc> +struct in_abs<cpu_t::avx2, cc> : in_abs<cc> { constexpr static cpu_t cpu = cpu_t::avx2; - using in_abs<cpu_t::ssse3>::abs; + using in_abs<cc>::abs; KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); } KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); } @@ -120,7 +119,6 @@ namespace native { using fn_abs = internal::in_abs<>::fn_abs; template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> - KFR_INTRIN ftype<T1> abs(const T1& x) { return internal::in_abs<>::abs(x); diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp @@ -69,6 +69,48 @@ struct logical_and }; template <> +struct in_bittest<cpu_t::common> +{ + constexpr static cpu_t cpu = cpu_t::common; + + template <typename T, size_t N> + KFR_SINTRIN bitmask<N> getmask(vec<T, N> x) + { + typename bitmask<N>::type val = 0; + for (size_t i = 0; i < N; i++) + { + val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i; + } + return val; + } + + template <typename T, size_t N> + KFR_SINTRIN bool bittestnone(vec<T, N> x) + { + return !getmask(x).value; + } + template <typename T, size_t N> + KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y) + { + return bittestnone(x & y); + } + + template <typename T, size_t N> + KFR_SINTRIN bool bittestall(vec<T, N> x) + { + return !getmask(~x).value; + } + template <typename T, size_t N> + KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y) + { + return bittestnone(~x & y); + } + + KFR_SPEC_FN(in_bittest, bittestnone) + KFR_SPEC_FN(in_bittest, bittestall) +}; + +template <> struct in_bittest<cpu_t::sse2> { constexpr static cpu_t cpu = cpu_t::sse2; diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp @@ -38,24 +38,57 @@ namespace kfr namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_min_max : in_min_max<older(cpu)> +template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu> +struct in_min_max : in_min_max<older(cpu), cc> { - struct fn_min : in_min_max<older(cpu)>::fn_min, fn_disabled + struct fn_min : in_min_max<older(cpu), cc>::fn_min, fn_disabled { }; - struct fn_max : in_min_max<older(cpu)>::fn_max, fn_disabled + struct fn_max : in_min_max<older(cpu), cc>::fn_max, fn_disabled { }; }; -template <> -struct in_min_max<cpu_t::sse2> : in_select<cpu_t::sse2> +template <cpu_t cc> +struct in_min_max<cpu_t::common, cc> : in_select<cc> +{ + constexpr static cpu_t cpu = cpu_t::common; + + template <typename T> + KFR_SINTRIN T min(initialvalue<T>) + { + return std::numeric_limits<T>::max(); + } + template <typename T> + KFR_SINTRIN T max(initialvalue<T>) + { + return std::numeric_limits<T>::min(); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y) + { + return select(x < y, x, y); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y) + { + return select(x > y, x, y); + } + + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) + KFR_SPEC_FN(in_min_max, min) + KFR_SPEC_FN(in_min_max, max) +}; + +template <cpu_t cc> +struct in_min_max<cpu_t::sse2, cc> : in_select<cc> { constexpr static cpu_t cpu = cpu_t::sse2; private: - using in_select<cpu>::select; + using in_select<cc>::select; public: template <typename T> @@ -93,13 +126,14 @@ public: KFR_HANDLE_ALL(min) KFR_HANDLE_ALL(max) - + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_min_max, min) KFR_SPEC_FN(in_min_max, max) }; -template <> -struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2> +template <cpu_t cc> +struct in_min_max<cpu_t::sse41, cc> : in_min_max<cpu_t::sse2> { constexpr static cpu_t cpu = cpu_t::sse41; using in_min_max<cpu_t::sse2>::min; @@ -117,12 +151,14 @@ struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2> KFR_HANDLE_ALL(min) KFR_HANDLE_ALL(max) + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_min_max, min) KFR_SPEC_FN(in_min_max, max) }; -template <> -struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41> +template <cpu_t cc> +struct in_min_max<cpu_t::avx1, cc> : in_min_max<cpu_t::sse41> { constexpr static cpu_t cpu = cpu_t::avx1; using in_min_max<cpu_t::sse41>::min; @@ -135,12 +171,14 @@ struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41> KFR_HANDLE_ALL(min) KFR_HANDLE_ALL(max) + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_min_max, min) KFR_SPEC_FN(in_min_max, max) }; -template <> -struct in_min_max<cpu_t::avx2> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2> +template <cpu_t cc> +struct in_min_max<cpu_t::avx2, cc> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2> { constexpr static cpu_t cpu = cpu_t::avx2; @@ -172,6 +210,8 @@ public: KFR_HANDLE_ALL(min) KFR_HANDLE_ALL(max) + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_min_max, min) KFR_SPEC_FN(in_min_max, max) }; @@ -193,6 +233,8 @@ public: KFR_HANDLE_ALL(minabs) KFR_HANDLE_ALL(maxabs) + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_minabs_maxabs, minabs) KFR_SPEC_FN(in_minabs_maxabs, maxabs) }; @@ -247,6 +289,8 @@ struct in_clamp : in_min_max<cpu> } KFR_HANDLE_ALL(clamp) KFR_HANDLE_ALL(clampm1) + KFR_HANDLE_SCALAR(min) + KFR_HANDLE_SCALAR(max) KFR_SPEC_FN(in_clamp, clamp) KFR_SPEC_FN(in_clamp, clampm1) }; diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp @@ -72,9 +72,9 @@ struct in_round : in_round<older(c)> }; template <> -struct in_round<cpu_t::sse2> +struct in_round<cpu_t::common> { - constexpr static cpu_t cpu = cpu_t::sse2; + constexpr static cpu_t cpu = cpu_t::common; template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> KFR_SINTRIN vec<T, N> floor(vec<T, N> value) @@ -102,38 +102,61 @@ struct in_round<cpu_t::sse2> return T(); } - KFR_SINTRIN f32sse floor(f32sse x) + template <size_t N> + KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x) { - f32sse t = cast<f32>(cast<i32>(x)); + vec<f32, N> t = cast<f32>(cast<i32>(x)); return t - (bitcast<f32>(x < t) & 1.f); } - KFR_SINTRIN f64sse floor(f64sse x) + template <size_t N> + KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x) { - f64sse t = cast<f64>(cast<i64>(x)); + vec<f64, N> t = cast<f64>(cast<i64>(x)); return t - (bitcast<f64>(x < t) & 1.0); } - KFR_SINTRIN f32sse ceil(f32sse x) + template <size_t N> + KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x) { - f32sse t = cast<f32>(cast<i32>(x)); + vec<f32, N> t = cast<f32>(cast<i32>(x)); return t + (bitcast<f32>(x > t) & 1.f); } - KFR_SINTRIN f64sse ceil(f64sse x) + template <size_t N> + KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x) { - f64sse t = cast<f64>(cast<i64>(x)); + vec<f64, N> t = cast<f64>(cast<i64>(x)); return t + (bitcast<f64>(x > t) & 1.0); } - KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); } - KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); } - KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); } - KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); } - KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } - KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } + template <size_t N> + KFR_SINTRIN vec<f32, N> round(vec<f32, N> x) + { + return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x))); + } + template <size_t N> + KFR_SINTRIN vec<f64, N> round(vec<f64, N> x) + { + return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x))); + } + template <size_t N> + KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x) + { + return cast<f32>(cast<i32>(x)); + } + template <size_t N> + KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x) + { + return cast<f64>(cast<i64>(x)); + } + template <size_t N> + KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x) + { + return x - floor(x); + } + template <size_t N> + KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x) + { + return x - floor(x); + } - KFR_HANDLE_ALL(floor) - KFR_HANDLE_ALL(ceil) - KFR_HANDLE_ALL(round) - KFR_HANDLE_ALL(trunc) - KFR_HANDLE_ALL(fract) KFR_HANDLE_SCALAR(floor) KFR_HANDLE_SCALAR(ceil) KFR_HANDLE_SCALAR(round) @@ -147,7 +170,7 @@ struct in_round<cpu_t::sse2> }; template <> -struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2> +struct in_round<cpu_t::sse41> : in_round<cpu_t::common> { constexpr static cpu_t cpu = cpu_t::sse41; diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp @@ -42,38 +42,41 @@ struct in_saturated : in_saturated<older(c), cc> struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled { }; + struct fn_satsub : in_saturated<older(c), cc>::fn_satsub, fn_disabled + { + }; }; template <cpu_t cc> -struct in_saturated<cpu_t::sse2, cc> : in_select<cc> +struct in_saturated<cpu_t::common, cc> : in_select<cc> { - constexpr static cpu_t cpu = cpu_t::sse2; - -private: - using in_select<cc>::select; - -public: - KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } - KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } - KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } - KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } - - KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } - KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } - KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } - KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } + constexpr static cpu_t cpu = cpu_t::common; - KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } - KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } - KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } - KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } + template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> + KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) + { + return saturated_signed_add(a, b); + } + template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> + KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b) + { + return saturated_unsigned_add(a, b); + } - KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } - KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } - KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } - KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } + template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> + KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) + { + return saturated_signed_sub(a, b); + } + template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)> + KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) + { + return saturated_unsigned_sub(a, b); + } + KFR_SPEC_FN(in_saturated, satadd) + KFR_SPEC_FN(in_saturated, satsub) -private: +protected: template <typename T, size_t N> KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b) { @@ -103,10 +106,41 @@ private: { return select(a < b, zerovector(a), a - b); } +}; + +template <cpu_t cc> +struct in_saturated<cpu_t::sse2, cc> : in_saturated<cpu_t::common>, in_select<cc> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + +private: + using in_select<cc>::select; public: + KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } + KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } + KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } + KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } + + KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } + KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } + KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } + KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } + + KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } + KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } + KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } + KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } + + KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } + KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } + KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } + KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } + KFR_HANDLE_ALL(satadd) KFR_HANDLE_ALL(satsub) + KFR_HANDLE_SCALAR(satadd) + KFR_HANDLE_SCALAR(satsub) KFR_SPEC_FN(in_saturated, satadd) KFR_SPEC_FN(in_saturated, satsub) }; @@ -130,6 +164,8 @@ struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc> KFR_HANDLE_ALL(satadd) KFR_HANDLE_ALL(satsub) + KFR_HANDLE_SCALAR(satadd) + KFR_HANDLE_SCALAR(satsub) KFR_SPEC_FN(in_saturated, satadd) KFR_SPEC_FN(in_saturated, satsub) }; diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp @@ -38,9 +38,9 @@ struct in_select_impl : in_select_impl<older(c)> }; template <> -struct in_select_impl<cpu_t::sse2> +struct in_select_impl<cpu_t::common> { - constexpr static cpu_t cur = cpu_t::sse2; + constexpr static cpu_t cur = cpu_t::common; template <typename T, size_t N> KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y) @@ -51,7 +51,7 @@ struct in_select_impl<cpu_t::sse2> }; template <> -struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2> +struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::common> { constexpr static cpu_t cpu = cpu_t::sse41; diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp @@ -39,6 +39,26 @@ struct in_sqrt : in_sqrt<older(c)> }; template <> +struct in_sqrt<cpu_t::common> +{ + constexpr static cpu_t cpu = cpu_t::common; + + template <size_t N> + KFR_SINTRIN vec<f32, N> sqrt(vec<f32, N> x) + { + return apply([](float xx) { return std::sqrt(xx); }, x); + } + template <size_t N> + KFR_SINTRIN vec<f64, N> sqrt(vec<f64, N> x) + { + return apply([](double xx) { return std::sqrt(xx); }, x); + } + + KFR_HANDLE_SCALAR(sqrt) + KFR_SPEC_FN(in_sqrt, sqrt) +}; + +template <> struct in_sqrt<cpu_t::sse2> { constexpr static cpu_t cpu = cpu_t::sse2; diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -314,13 +314,14 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y) enum class cpu_t : int { - sse2 = 0, - sse3 = 1, - ssse3 = 2, - sse41 = 3, - sse42 = 4, - avx1 = 5, - avx2 = 6, + common = 0, + sse2 = 1, + sse3 = 2, + ssse3 = 3, + sse41 = 4, + sse42 = 5, + avx1 = 6, + avx2 = 7, avx = static_cast<int>(avx1), native = static_cast<int>(KFR_ARCH_NAME), lowest = static_cast<int>(sse2), @@ -639,10 +640,14 @@ constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1; constexpr size_t maximum_vector_alignment = 32; constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1; constexpr size_t native_register_count = bitness_const(8, 16); + +constexpr size_t common_float_vector_size = 16; +constexpr size_t common_int_vector_size = 16; + template <cpu_t c> -constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : 0; +constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size; template <cpu_t c> -constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : 0; +constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size; struct input_expression { diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -113,7 +113,7 @@ #elif defined CID_ARCH_SSE #define CID_ARCH_NAME sse #else -#define CID_ARCH_NAME legacy +#define CID_ARCH_NAME common #endif #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -18,7 +18,8 @@ cmake_minimum_required(VERSION 3.0) if (NOT MSVC) - add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0 -march=native) + add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0) + add_compile_options(-march=native) link_libraries(stdc++ pthread m) else () add_compile_options(/arch:AVX)