commit 969aee6c30842a4486facc57e088cd177fcc07cb
parent ffa2909116d211648f819de2a2b2be3b0b622bb8
Author: [email protected] <[email protected]>
Date: Fri, 22 Jul 2016 10:15:00 +0300
generic cpu support
Diffstat:
10 files changed, 259 insertions(+), 90 deletions(-)
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -37,21 +37,21 @@ namespace kfr
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_abs : in_abs<older(cpu)>
+template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
+struct in_abs : in_abs<older(cpu), cc>
{
- struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled
+ struct fn_abs : in_abs<older(cpu), cc>::fn_abs, fn_disabled
{
};
};
-template <>
-struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_abs<cpu_t::common, cc> : in_select<cc>
{
- constexpr static cpu_t cpu = cpu_t::sse2;
+ constexpr static cpu_t cpu = cpu_t::common;
private:
- using in_select<cpu_t::sse2>::select;
+ using in_select<cc>::select;
public:
template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
@@ -65,18 +65,17 @@ public:
return value & invhighbitmask<T>;
}
- KFR_HANDLE_ALL(abs)
KFR_HANDLE_SCALAR(abs)
KFR_SPEC_FN(in_abs, abs)
};
-template <>
-struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_abs<cpu_t::ssse3, cc> : in_abs<cpu_t::common>, in_select<cc>
{
constexpr static cpu_t cpu = cpu_t::ssse3;
private:
- using in_select<cpu_t::sse2>::select;
+ using in_select<cc>::select;
public:
template <size_t N>
@@ -100,11 +99,11 @@ public:
KFR_SPEC_FN(in_abs, abs)
};
-template <>
-struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3>
+template <cpu_t cc>
+struct in_abs<cpu_t::avx2, cc> : in_abs<cc>
{
constexpr static cpu_t cpu = cpu_t::avx2;
- using in_abs<cpu_t::ssse3>::abs;
+ using in_abs<cc>::abs;
KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
@@ -120,7 +119,6 @@ namespace native
{
using fn_abs = internal::in_abs<>::fn_abs;
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-
KFR_INTRIN ftype<T1> abs(const T1& x)
{
return internal::in_abs<>::abs(x);
diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp
@@ -69,6 +69,48 @@ struct logical_and
};
template <>
+struct in_bittest<cpu_t::common>
+{
+ constexpr static cpu_t cpu = cpu_t::common;
+
+ template <typename T, size_t N>
+ KFR_SINTRIN bitmask<N> getmask(vec<T, N> x)
+ {
+ typename bitmask<N>::type val = 0;
+ for (size_t i = 0; i < N; i++)
+ {
+ val |= (ubitcast(x[i]) >> (typebits<T>::bits - 1)) << i;
+ }
+ return val;
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN bool bittestnone(vec<T, N> x)
+ {
+ return !getmask(x).value;
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN bool bittestnone(vec<T, N> x, vec<T, N> y)
+ {
+ return bittestnone(x & y);
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN bool bittestall(vec<T, N> x)
+ {
+ return !getmask(~x).value;
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN bool bittestall(vec<T, N> x, vec<T, N> y)
+ {
+ return bittestnone(~x & y);
+ }
+
+ KFR_SPEC_FN(in_bittest, bittestnone)
+ KFR_SPEC_FN(in_bittest, bittestall)
+};
+
+template <>
struct in_bittest<cpu_t::sse2>
{
constexpr static cpu_t cpu = cpu_t::sse2;
diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp
@@ -38,24 +38,57 @@ namespace kfr
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_min_max : in_min_max<older(cpu)>
+template <cpu_t cpu = cpu_t::native, cpu_t cc = cpu>
+struct in_min_max : in_min_max<older(cpu), cc>
{
- struct fn_min : in_min_max<older(cpu)>::fn_min, fn_disabled
+ struct fn_min : in_min_max<older(cpu), cc>::fn_min, fn_disabled
{
};
- struct fn_max : in_min_max<older(cpu)>::fn_max, fn_disabled
+ struct fn_max : in_min_max<older(cpu), cc>::fn_max, fn_disabled
{
};
};
-template <>
-struct in_min_max<cpu_t::sse2> : in_select<cpu_t::sse2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::common, cc> : in_select<cc>
+{
+ constexpr static cpu_t cpu = cpu_t::common;
+
+ template <typename T>
+ KFR_SINTRIN T min(initialvalue<T>)
+ {
+ return std::numeric_limits<T>::max();
+ }
+ template <typename T>
+ KFR_SINTRIN T max(initialvalue<T>)
+ {
+ return std::numeric_limits<T>::min();
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> min(vec<T, N> x, vec<T, N> y)
+ {
+ return select(x < y, x, y);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> max(vec<T, N> x, vec<T, N> y)
+ {
+ return select(x > y, x, y);
+ }
+
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
+ KFR_SPEC_FN(in_min_max, min)
+ KFR_SPEC_FN(in_min_max, max)
+};
+
+template <cpu_t cc>
+struct in_min_max<cpu_t::sse2, cc> : in_select<cc>
{
constexpr static cpu_t cpu = cpu_t::sse2;
private:
- using in_select<cpu>::select;
+ using in_select<cc>::select;
public:
template <typename T>
@@ -93,13 +126,14 @@ public:
KFR_HANDLE_ALL(min)
KFR_HANDLE_ALL(max)
-
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_min_max, min)
KFR_SPEC_FN(in_min_max, max)
};
-template <>
-struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::sse41, cc> : in_min_max<cpu_t::sse2>
{
constexpr static cpu_t cpu = cpu_t::sse41;
using in_min_max<cpu_t::sse2>::min;
@@ -117,12 +151,14 @@ struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2>
KFR_HANDLE_ALL(min)
KFR_HANDLE_ALL(max)
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_min_max, min)
KFR_SPEC_FN(in_min_max, max)
};
-template <>
-struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41>
+template <cpu_t cc>
+struct in_min_max<cpu_t::avx1, cc> : in_min_max<cpu_t::sse41>
{
constexpr static cpu_t cpu = cpu_t::avx1;
using in_min_max<cpu_t::sse41>::min;
@@ -135,12 +171,14 @@ struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41>
KFR_HANDLE_ALL(min)
KFR_HANDLE_ALL(max)
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_min_max, min)
KFR_SPEC_FN(in_min_max, max)
};
-template <>
-struct in_min_max<cpu_t::avx2> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2>
+template <cpu_t cc>
+struct in_min_max<cpu_t::avx2, cc> : in_min_max<cpu_t::avx1>, in_select<cpu_t::avx2>
{
constexpr static cpu_t cpu = cpu_t::avx2;
@@ -172,6 +210,8 @@ public:
KFR_HANDLE_ALL(min)
KFR_HANDLE_ALL(max)
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_min_max, min)
KFR_SPEC_FN(in_min_max, max)
};
@@ -193,6 +233,8 @@ public:
KFR_HANDLE_ALL(minabs)
KFR_HANDLE_ALL(maxabs)
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_minabs_maxabs, minabs)
KFR_SPEC_FN(in_minabs_maxabs, maxabs)
};
@@ -247,6 +289,8 @@ struct in_clamp : in_min_max<cpu>
}
KFR_HANDLE_ALL(clamp)
KFR_HANDLE_ALL(clampm1)
+ KFR_HANDLE_SCALAR(min)
+ KFR_HANDLE_SCALAR(max)
KFR_SPEC_FN(in_clamp, clamp)
KFR_SPEC_FN(in_clamp, clampm1)
};
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -72,9 +72,9 @@ struct in_round : in_round<older(c)>
};
template <>
-struct in_round<cpu_t::sse2>
+struct in_round<cpu_t::common>
{
- constexpr static cpu_t cpu = cpu_t::sse2;
+ constexpr static cpu_t cpu = cpu_t::common;
template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
@@ -102,38 +102,61 @@ struct in_round<cpu_t::sse2>
return T();
}
- KFR_SINTRIN f32sse floor(f32sse x)
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> floor(vec<f32, N> x)
{
- f32sse t = cast<f32>(cast<i32>(x));
+ vec<f32, N> t = cast<f32>(cast<i32>(x));
return t - (bitcast<f32>(x < t) & 1.f);
}
- KFR_SINTRIN f64sse floor(f64sse x)
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> floor(vec<f64, N> x)
{
- f64sse t = cast<f64>(cast<i64>(x));
+ vec<f64, N> t = cast<f64>(cast<i64>(x));
return t - (bitcast<f64>(x < t) & 1.0);
}
- KFR_SINTRIN f32sse ceil(f32sse x)
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> ceil(vec<f32, N> x)
{
- f32sse t = cast<f32>(cast<i32>(x));
+ vec<f32, N> t = cast<f32>(cast<i32>(x));
return t + (bitcast<f32>(x > t) & 1.f);
}
- KFR_SINTRIN f64sse ceil(f64sse x)
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> ceil(vec<f64, N> x)
{
- f64sse t = cast<f64>(cast<i64>(x));
+ vec<f64, N> t = cast<f64>(cast<i64>(x));
return t + (bitcast<f64>(x > t) & 1.0);
}
- KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); }
- KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); }
- KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); }
- KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); }
- KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
- KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> round(vec<f32, N> x)
+ {
+ return cast<f32>(cast<i32>(x + mulsign(broadcast<N>(0.5f), x)));
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> round(vec<f64, N> x)
+ {
+ return cast<f64>(cast<i64>(x + mulsign(broadcast<N>(0.5), x)));
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> trunc(vec<f32, N> x)
+ {
+ return cast<f32>(cast<i32>(x));
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> trunc(vec<f64, N> x)
+ {
+ return cast<f64>(cast<i64>(x));
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> fract(vec<f32, N> x)
+ {
+ return x - floor(x);
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x)
+ {
+ return x - floor(x);
+ }
- KFR_HANDLE_ALL(floor)
- KFR_HANDLE_ALL(ceil)
- KFR_HANDLE_ALL(round)
- KFR_HANDLE_ALL(trunc)
- KFR_HANDLE_ALL(fract)
KFR_HANDLE_SCALAR(floor)
KFR_HANDLE_SCALAR(ceil)
KFR_HANDLE_SCALAR(round)
@@ -147,7 +170,7 @@ struct in_round<cpu_t::sse2>
};
template <>
-struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2>
+struct in_round<cpu_t::sse41> : in_round<cpu_t::common>
{
constexpr static cpu_t cpu = cpu_t::sse41;
diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp
@@ -42,38 +42,41 @@ struct in_saturated : in_saturated<older(c), cc>
struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled
{
};
+ struct fn_satsub : in_saturated<older(c), cc>::fn_satsub, fn_disabled
+ {
+ };
};
template <cpu_t cc>
-struct in_saturated<cpu_t::sse2, cc> : in_select<cc>
+struct in_saturated<cpu_t::common, cc> : in_select<cc>
{
- constexpr static cpu_t cpu = cpu_t::sse2;
-
-private:
- using in_select<cc>::select;
-
-public:
- KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
- KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
- KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
- KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
-
- KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
- KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
- KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
- KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+ constexpr static cpu_t cpu = cpu_t::common;
- KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
- KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
- KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
- KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+ template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+ KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
+ {
+ return saturated_signed_add(a, b);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+ KFR_SINTRIN vec<T, N> satadd(vec<T, N> a, vec<T, N> b)
+ {
+ return saturated_unsigned_add(a, b);
+ }
- KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
- KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
- KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
- KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+ template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)>
+ KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+ {
+ return saturated_signed_sub(a, b);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(std::is_unsigned<T>::value)>
+ KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b)
+ {
+ return saturated_unsigned_sub(a, b);
+ }
+ KFR_SPEC_FN(in_saturated, satadd)
+ KFR_SPEC_FN(in_saturated, satsub)
-private:
+protected:
template <typename T, size_t N>
KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
{
@@ -103,10 +106,41 @@ private:
{
return select(a < b, zerovector(a), a - b);
}
+};
+
+template <cpu_t cc>
+struct in_saturated<cpu_t::sse2, cc> : in_saturated<cpu_t::common>, in_select<cc>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+private:
+ using in_select<cc>::select;
public:
+ KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
+ KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
+ KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
+ KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
+
+ KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
+ KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
+ KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
+ KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+
+ KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
+ KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
+ KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
+ KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+
+ KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
+ KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
+ KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
+ KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+
KFR_HANDLE_ALL(satadd)
KFR_HANDLE_ALL(satsub)
+ KFR_HANDLE_SCALAR(satadd)
+ KFR_HANDLE_SCALAR(satsub)
KFR_SPEC_FN(in_saturated, satadd)
KFR_SPEC_FN(in_saturated, satsub)
};
@@ -130,6 +164,8 @@ struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc>
KFR_HANDLE_ALL(satadd)
KFR_HANDLE_ALL(satsub)
+ KFR_HANDLE_SCALAR(satadd)
+ KFR_HANDLE_SCALAR(satsub)
KFR_SPEC_FN(in_saturated, satadd)
KFR_SPEC_FN(in_saturated, satsub)
};
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -38,9 +38,9 @@ struct in_select_impl : in_select_impl<older(c)>
};
template <>
-struct in_select_impl<cpu_t::sse2>
+struct in_select_impl<cpu_t::common>
{
- constexpr static cpu_t cur = cpu_t::sse2;
+ constexpr static cpu_t cur = cpu_t::common;
template <typename T, size_t N>
KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y)
@@ -51,7 +51,7 @@ struct in_select_impl<cpu_t::sse2>
};
template <>
-struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2>
+struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::common>
{
constexpr static cpu_t cpu = cpu_t::sse41;
diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp
@@ -39,6 +39,26 @@ struct in_sqrt : in_sqrt<older(c)>
};
template <>
+struct in_sqrt<cpu_t::common>
+{
+ constexpr static cpu_t cpu = cpu_t::common;
+
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> sqrt(vec<f32, N> x)
+ {
+ return apply([](float xx) { return std::sqrt(xx); }, x);
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> sqrt(vec<f64, N> x)
+ {
+ return apply([](double xx) { return std::sqrt(xx); }, x);
+ }
+
+ KFR_HANDLE_SCALAR(sqrt)
+ KFR_SPEC_FN(in_sqrt, sqrt)
+};
+
+template <>
struct in_sqrt<cpu_t::sse2>
{
constexpr static cpu_t cpu = cpu_t::sse2;
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -314,13 +314,14 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y)
enum class cpu_t : int
{
- sse2 = 0,
- sse3 = 1,
- ssse3 = 2,
- sse41 = 3,
- sse42 = 4,
- avx1 = 5,
- avx2 = 6,
+ common = 0,
+ sse2 = 1,
+ sse3 = 2,
+ ssse3 = 3,
+ sse41 = 4,
+ sse42 = 5,
+ avx1 = 6,
+ avx2 = 7,
avx = static_cast<int>(avx1),
native = static_cast<int>(KFR_ARCH_NAME),
lowest = static_cast<int>(sse2),
@@ -639,10 +640,14 @@ constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1;
constexpr size_t maximum_vector_alignment = 32;
constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
constexpr size_t native_register_count = bitness_const(8, 16);
+
+constexpr size_t common_float_vector_size = 16;
+constexpr size_t common_int_vector_size = 16;
+
template <cpu_t c>
-constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
template <cpu_t c>
-constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
struct input_expression
{
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -113,7 +113,7 @@
#elif defined CID_ARCH_SSE
#define CID_ARCH_NAME sse
#else
-#define CID_ARCH_NAME legacy
+#define CID_ARCH_NAME common
#endif
#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -18,7 +18,8 @@
cmake_minimum_required(VERSION 3.0)
if (NOT MSVC)
- add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0 -march=native)
+ add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0)
+ add_compile_options(-march=native)
link_libraries(stdc++ pthread m)
else ()
add_compile_options(/arch:AVX)