generic cpu support - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 969aee6c30842a4486facc57e088cd177fcc07cb
parent ffa2909116d211648f819de2a2b2be3b0b622bb8
Author: [email protected] <[email protected]>
Date:   Fri, 22 Jul 2016 10:15:00 +0300

generic cpu support

Diffstat:
M include/kfr/base/abs.hpp  | 28 +++++++++++++---------------
M include/kfr/base/logical.hpp  | 42 ++++++++++++++++++++++++++++++++++++++++++
M include/kfr/base/min_max.hpp  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
M include/kfr/base/round.hpp  | 67 +++++++++++++++++++++++++++++++++++++++++++++----------------------
M include/kfr/base/saturation.hpp  | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
M include/kfr/base/select.hpp  | 6 +++---
M include/kfr/base/sqrt.hpp  | 20 ++++++++++++++++++++
M include/kfr/base/types.hpp  | 23 ++++++++++++++---------
M include/kfr/cident.h  | 2 +-
M tests/CMakeLists.txt  | 3 ++-

10 files changed, 259 insertions(+), 90 deletions(-)
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -37,21 +37,21 @@ namespace kfr
 namespace internal
 {
 
-template <cpu_t cpu = cpu_t::native>
-struct in_abs : in_abs<older(cpu)>
+template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
+struct in_abs : in_abs<older(cpu), cc>
 {
-    struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled
+    struct fn_abs : in_abs<older(cpu), cc>::fn_abs, fn_disabled
     {
     };
 };
 
-template <>
-struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_abs<cpu_t::common, cc> : in_select<cc>
 {
-    constexpr static cpu_t cpu = cpu_t::sse2;
+    constexpr static cpu_t cpu = cpu_t::common;
 
 private:
-    using in_select<cpu_t::sse2>::select;
+    using in_select<cc>::select;
 
 public:
     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
@@ -65,18 +65,17 @@ public:
         return value & invhighbitmask<T>;
     }
 
-    KFR_HANDLE_ALL(abs)
     KFR_HANDLE_SCALAR(abs)
     KFR_SPEC_FN(in_abs, abs)
 };
 
-template <>
-struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_abs<cpu_t::ssse3, cc> : in_abs<cpu_t::common>, in_select<cc>
 {
     constexpr static cpu_t cpu = cpu_t::ssse3;
 
 private:
-    using in_select<cpu_t::sse2>::select;
+    using in_select<cc>::select;
 
 public:
     template <size_t N>
@@ -100,11 +99,11 @@ public:
     KFR_SPEC_FN(in_abs, abs)
 };
 
-template <>
-struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3>
+template <cpu_t cc>
+struct in_abs<cpu_t::avx2, cc> : in_abs<cc>
 {
     constexpr static cpu_t cpu = cpu_t::avx2;
-    using in_abs<cpu_t::ssse3>::abs;
+    using in_abs<cc>::abs;
 
     KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
     KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
@@ -120,7 +119,6 @@ namespace native
 {
 using fn_abs = internal::in_abs<>::fn_abs;
 template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-
 KFR_INTRIN ftype<T1> abs(const T1& x)
 {
     return internal::in_abs<>::abs(x);
diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp
@@ -69,6 +69,48 @@ struct logical_and
 };
 
 template <>
+struct in_bittest<cpu_t::common>
+{
+    constexpr static cpu_t cpu = cpu_t::common;
+
+    template <typename T, size_t N>
+    KFR_SINTRIN bitmask<N> getmask(vec<T, N> x)
+    {
+        typename bitmask<N>::type val = 0;
+        for (size_t i = 0; i < N; i++)
+        {
+            val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i;
+        }
+        return val;
+    }
+
+    template <typename T, size_t N>
+    KFR_SINTRIN bool bittestnone(vec<T, N> x)
+    {
+        return !getmask(x).value;
+    }
+    template <typename T, size_t N>
+    KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y)
+    {
+        return bittestnone(x & y);
+    }
+
+    template <typename T, size_t N>
+    KFR_SINTRIN bool bittestall(vec<T, N> x)
+    {
+        return !getmask(~x).value;
+    }
+    template <typename T, size_t N>
+    KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y)
+    {
+        return bittestnone(~x & y);
+    }
+
+    KFR_SPEC_FN(in_bittest, bittestnone)
+    KFR_SPEC_FN(in_bittest, bittestall)
+};
+
+template <>
 struct in_bittest<cpu_t::sse2>
 {
     constexpr static cpu_t cpu = cpu_t::sse2;
diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp
@@ -38,24 +38,57 @@ namespace kfr
 namespace internal
 {
 
-template <cpu_t cpu = cpu_t::native>
-struct in_min_max : in_min_max<older(cpu)>
+template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
+struct in_min_max : in_min_max<older(cpu), cc>
 {
-    struct fn_min : in_min_max<older(cpu)>::fn_min, fn_disabled
+    struct fn_min : in_min_max<older(cpu), cc>::fn_min, fn_disabled
     {
     };
-    struct fn_max : in_min_max<older(cpu)>::fn_max, fn_disabled
+    struct fn_max : in_min_max<older(cpu), cc>::fn_max, fn_disabled
     {
     };
 };
 
-template <>
-struct in_min_max<cpu_t::sse2> : in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::common, cc> : in_select<cc>
+{
+    constexpr static cpu_t cpu = cpu_t::common;
+
+    template <typename T>
+    KFR_SINTRIN T min(initialvalue<T>)
+    {
+        return std::numeric_limits<T>::max();
+    }
+    template <typename T>
+    KFR_SINTRIN T max(initialvalue<T>)
+    {
+        return std::numeric_limits<T>::min();
+    }
+
+    template <typename T, size_t N>
+    KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y)
+    {
+        return select(x < y, x, y);
+    }
+    template <typename T, size_t N>
+    KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y)
+    {
+        return select(x > y, x, y);
+    }
+
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
+    KFR_SPEC_FN(in_min_max, min)
+    KFR_SPEC_FN(in_min_max, max)
+};
+
+template <cpu_t cc>
+struct in_min_max<cpu_t::sse2, cc> : in_select<cc>
 {
     constexpr static cpu_t cpu = cpu_t::sse2;
 
 private:
-    using in_select<cpu>::select;
+    using in_select<cc>::select;
 
 public:
     template <typename T>
@@ -93,13 +126,14 @@ public:
 
     KFR_HANDLE_ALL(min)
     KFR_HANDLE_ALL(max)
-
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_min_max, min)
     KFR_SPEC_FN(in_min_max, max)
 };
 
-template <>
-struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::sse41, cc> : in_min_max<cpu_t::sse2>
 {
     constexpr static cpu_t cpu = cpu_t::sse41;
     using in_min_max<cpu_t::sse2>::min;
@@ -117,12 +151,14 @@ struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2>
 
     KFR_HANDLE_ALL(min)
     KFR_HANDLE_ALL(max)
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_min_max, min)
     KFR_SPEC_FN(in_min_max, max)
 };
 
-template <>
-struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41>
+template <cpu_t cc>
+struct in_min_max<cpu_t::avx1, cc> : in_min_max<cpu_t::sse41>
 {
     constexpr static cpu_t cpu = cpu_t::avx1;
     using in_min_max<cpu_t::sse41>::min;
@@ -135,12 +171,14 @@ struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41>
 
     KFR_HANDLE_ALL(min)
     KFR_HANDLE_ALL(max)
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_min_max, min)
     KFR_SPEC_FN(in_min_max, max)
 };
 
-template <>
-struct in_min_max<cpu_t::avx2> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::avx2, cc> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2>
 {
     constexpr static cpu_t cpu = cpu_t::avx2;
 
@@ -172,6 +210,8 @@ public:
 
     KFR_HANDLE_ALL(min)
     KFR_HANDLE_ALL(max)
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_min_max, min)
     KFR_SPEC_FN(in_min_max, max)
 };
@@ -193,6 +233,8 @@ public:
 
     KFR_HANDLE_ALL(minabs)
     KFR_HANDLE_ALL(maxabs)
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_minabs_maxabs, minabs)
     KFR_SPEC_FN(in_minabs_maxabs, maxabs)
 };
@@ -247,6 +289,8 @@ struct in_clamp : in_min_max<cpu>
     }
     KFR_HANDLE_ALL(clamp)
     KFR_HANDLE_ALL(clampm1)
+    KFR_HANDLE_SCALAR(min)
+    KFR_HANDLE_SCALAR(max)
     KFR_SPEC_FN(in_clamp, clamp)
     KFR_SPEC_FN(in_clamp, clampm1)
 };
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -72,9 +72,9 @@ struct in_round : in_round<older(c)>
 };
 
 template <>
-struct in_round<cpu_t::sse2>
+struct in_round<cpu_t::common>
 {
-    constexpr static cpu_t cpu = cpu_t::sse2;
+    constexpr static cpu_t cpu = cpu_t::common;
 
     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
     KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
@@ -102,38 +102,61 @@ struct in_round<cpu_t::sse2>
         return T();
     }
 
-    KFR_SINTRIN f32sse floor(f32sse x)
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x)
     {
-        f32sse t = cast<f32>(cast<i32>(x));
+        vec<f32, N> t = cast<f32>(cast<i32>(x));
         return t - (bitcast<f32>(x < t) & 1.f);
     }
-    KFR_SINTRIN f64sse floor(f64sse x)
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x)
     {
-        f64sse t = cast<f64>(cast<i64>(x));
+        vec<f64, N> t = cast<f64>(cast<i64>(x));
         return t - (bitcast<f64>(x < t) & 1.0);
     }
-    KFR_SINTRIN f32sse ceil(f32sse x)
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x)
     {
-        f32sse t = cast<f32>(cast<i32>(x));
+        vec<f32, N> t = cast<f32>(cast<i32>(x));
         return t + (bitcast<f32>(x > t) & 1.f);
     }
-    KFR_SINTRIN f64sse ceil(f64sse x)
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x)
     {
-        f64sse t = cast<f64>(cast<i64>(x));
+        vec<f64, N> t = cast<f64>(cast<i64>(x));
         return t + (bitcast<f64>(x > t) & 1.0);
     }
-    KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); }
-    KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); }
-    KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); }
-    KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); }
-    KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
-    KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> round(vec<f32, N> x)
+    {
+        return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x)));
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> round(vec<f64, N> x)
+    {
+        return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x)));
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x)
+    {
+        return cast<f32>(cast<i32>(x));
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x)
+    {
+        return cast<f64>(cast<i64>(x));
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x)
+    {
+        return x - floor(x);
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x)
+    {
+        return x - floor(x);
+    }
 
-    KFR_HANDLE_ALL(floor)
-    KFR_HANDLE_ALL(ceil)
-    KFR_HANDLE_ALL(round)
-    KFR_HANDLE_ALL(trunc)
-    KFR_HANDLE_ALL(fract)
     KFR_HANDLE_SCALAR(floor)
     KFR_HANDLE_SCALAR(ceil)
     KFR_HANDLE_SCALAR(round)
@@ -147,7 +170,7 @@ struct in_round<cpu_t::sse2>
 };
 
 template <>
-struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2>
+struct in_round<cpu_t::sse41> : in_round<cpu_t::common>
 {
     constexpr static cpu_t cpu = cpu_t::sse41;
 
diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp
@@ -42,38 +42,41 @@ struct in_saturated : in_saturated<older(c), cc>
     struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled
     {
     };
+    struct fn_satsub : in_saturated<older(c), cc>::fn_satsub, fn_disabled
+    {
+    };
 };
 
 template <cpu_t cc>
-struct in_saturated<cpu_t::sse2, cc> : in_select<cc>
+struct in_saturated<cpu_t::common, cc> : in_select<cc>
 {
-    constexpr static cpu_t cpu = cpu_t::sse2;
-
-private:
-    using in_select<cc>::select;
-
-public:
-    KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
-    KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
-    KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
-    KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
-
-    KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
-    KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
-    KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
-    KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+    constexpr static cpu_t cpu = cpu_t::common;
 
-    KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
-    KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
-    KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
-    KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+    template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+    KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
+    {
+        return saturated_signed_add(a, b);
+    }
+    template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+    KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
+    {
+        return saturated_unsigned_add(a, b);
+    }
 
-    KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
-    KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
-    KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
-    KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+    template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+    KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+    {
+        return saturated_signed_sub(a, b);
+    }
+    template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+    KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+    {
+        return saturated_unsigned_sub(a, b);
+    }
+    KFR_SPEC_FN(in_saturated, satadd)
+    KFR_SPEC_FN(in_saturated, satsub)
 
-private:
+protected:
     template <typename T, size_t N>
     KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
     {
@@ -103,10 +106,41 @@ private:
     {
         return select(a < b, zerovector(a), a - b);
     }
+};
+
+template <cpu_t cc>
+struct in_saturated<cpu_t::sse2, cc> : in_saturated<cpu_t::common>, in_select<cc>
+{
+    constexpr static cpu_t cpu = cpu_t::sse2;
+
+private:
+    using in_select<cc>::select;
 
 public:
+    KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
+    KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
+    KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
+    KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
+
+    KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
+    KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
+    KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
+    KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+
+    KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
+    KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
+    KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
+    KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+
+    KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
+    KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
+    KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
+    KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+
     KFR_HANDLE_ALL(satadd)
     KFR_HANDLE_ALL(satsub)
+    KFR_HANDLE_SCALAR(satadd)
+    KFR_HANDLE_SCALAR(satsub)
     KFR_SPEC_FN(in_saturated, satadd)
     KFR_SPEC_FN(in_saturated, satsub)
 };
@@ -130,6 +164,8 @@ struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc>
 
     KFR_HANDLE_ALL(satadd)
     KFR_HANDLE_ALL(satsub)
+    KFR_HANDLE_SCALAR(satadd)
+    KFR_HANDLE_SCALAR(satsub)
     KFR_SPEC_FN(in_saturated, satadd)
     KFR_SPEC_FN(in_saturated, satsub)
 };
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -38,9 +38,9 @@ struct in_select_impl : in_select_impl<older(c)>
 };
 
 template <>
-struct in_select_impl<cpu_t::sse2>
+struct in_select_impl<cpu_t::common>
 {
-    constexpr static cpu_t cur = cpu_t::sse2;
+    constexpr static cpu_t cur = cpu_t::common;
 
     template <typename T, size_t N>
     KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y)
@@ -51,7 +51,7 @@ struct in_select_impl<cpu_t::sse2>
 };
 
 template <>
-struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2>
+struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::common>
 {
     constexpr static cpu_t cpu = cpu_t::sse41;
 
diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp
@@ -39,6 +39,26 @@ struct in_sqrt : in_sqrt<older(c)>
 };
 
 template <>
+struct in_sqrt<cpu_t::common>
+{
+    constexpr static cpu_t cpu = cpu_t::common;
+
+    template <size_t N>
+    KFR_SINTRIN vec<f32, N> sqrt(vec<f32, N> x)
+    {
+        return apply([](float xx) { return std::sqrt(xx); }, x);
+    }
+    template <size_t N>
+    KFR_SINTRIN vec<f64, N> sqrt(vec<f64, N> x)
+    {
+        return apply([](double xx) { return std::sqrt(xx); }, x);
+    }
+
+    KFR_HANDLE_SCALAR(sqrt)
+    KFR_SPEC_FN(in_sqrt, sqrt)
+};
+
+template <>
 struct in_sqrt<cpu_t::sse2>
 {
     constexpr static cpu_t cpu = cpu_t::sse2;
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -314,13 +314,14 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y)
 
 enum class cpu_t : int
 {
-    sse2    = 0,
-    sse3    = 1,
-    ssse3   = 2,
-    sse41   = 3,
-    sse42   = 4,
-    avx1    = 5,
-    avx2    = 6,
+    common  = 0,
+    sse2    = 1,
+    sse3    = 2,
+    ssse3   = 3,
+    sse41   = 4,
+    sse42   = 5,
+    avx1    = 6,
+    avx2    = 7,
     avx     = static_cast<int>(avx1),
     native  = static_cast<int>(KFR_ARCH_NAME),
     lowest  = static_cast<int>(sse2),
@@ -639,10 +640,14 @@ constexpr size_t native_cache_alignment_mask   = native_cache_alignment - 1;
 constexpr size_t maximum_vector_alignment      = 32;
 constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
 constexpr size_t native_register_count         = bitness_const(8, 16);
+
+constexpr size_t common_float_vector_size = 16;
+constexpr size_t common_int_vector_size = 16;
+
 template <cpu_t c>
-constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
 template <cpu_t c>
-constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
 
 struct input_expression
 {
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -113,7 +113,7 @@
 #elif defined CID_ARCH_SSE
 #define CID_ARCH_NAME sse
 #else
-#define CID_ARCH_NAME legacy
+#define CID_ARCH_NAME common
 #endif
 
 #endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -18,7 +18,8 @@
 cmake_minimum_required(VERSION 3.0)
 
 if (NOT MSVC)
-    add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0 -march=native)
+    add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0)
+    add_compile_options(-march=native)
     link_libraries(stdc++ pthread m)
 else ()
     add_compile_options(/arch:AVX)

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	include/kfr/base/abs.hpp	\|	28	+++++++++++++---------------
M	include/kfr/base/logical.hpp	\|	42	++++++++++++++++++++++++++++++++++++++++++
M	include/kfr/base/min_max.hpp	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
M	include/kfr/base/round.hpp	\|	67	+++++++++++++++++++++++++++++++++++++++++++++----------------------
M	include/kfr/base/saturation.hpp	\|	86	++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
M	include/kfr/base/select.hpp	\|	6	+++---
M	include/kfr/base/sqrt.hpp	\|	20	++++++++++++++++++++
M	include/kfr/base/types.hpp	\|	23	++++++++++++++---------
M	include/kfr/cident.h	\|	2	+-
M	tests/CMakeLists.txt	\|	3	++-