commit 936b9a553e5ca3f78afd11683227f11513921120
parent cd6d4e1c87fcc1a052c0eb3604210dafa1712f52
Author: [email protected] <[email protected]>
Date: Tue, 26 Jul 2016 14:46:47 +0300
Fix round functions and other small fixes
Diffstat:
4 files changed, 126 insertions(+), 55 deletions(-)
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -41,26 +41,26 @@ KFR_SINTRIN vec<T, N> abs(vec<T, N> x)
#if defined CID_ARCH_SSSE3
KFR_SINTRIN i64sse abs(i64sse x) { return select(x >= 0, x, -x); }
-KFR_SINTRIN i32sse abs(i32sse value) { return _mm_abs_epi32(*value); }
-KFR_SINTRIN i16sse abs(i16sse value) { return _mm_abs_epi16(*value); }
-KFR_SINTRIN i8sse abs(i8sse value) { return _mm_abs_epi8(*value); }
+KFR_SINTRIN i32sse abs(i32sse x) { return _mm_abs_epi32(*x); }
+KFR_SINTRIN i16sse abs(i16sse x) { return _mm_abs_epi16(*x); }
+KFR_SINTRIN i8sse abs(i8sse x) { return _mm_abs_epi8(*x); }
+KFR_SINTRIN u64sse abs(u64sse x) { return x; }
+KFR_SINTRIN u32sse abs(u32sse x) { return x; }
+KFR_SINTRIN u16sse abs(u16sse x) { return x; }
+KFR_SINTRIN u8sse abs(u8sse x) { return x; }
#if defined CID_ARCH_AVX2
+KFR_SINTRIN i64avx abs(i64avx value) { return select(x >= 0, x, -x); }
KFR_SINTRIN i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
KFR_SINTRIN i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
KFR_SINTRIN i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); }
+KFR_SINTRIN u64avx abs(u64avx x) { return x; }
+KFR_SINTRIN u32avx abs(u32avx x) { return x; }
+KFR_SINTRIN u16avx abs(u16avx x) { return x; }
+KFR_SINTRIN u8avx abs(u8avx x) { return x; }
#endif
-template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && !is_f_class<T>::value)>
-KFR_SINTRIN vec<T, N> abs(vec<T, N> a)
-{
- return slice<0, N>(abs(expand_simd(a)));
-}
-template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && !is_f_class<T>::value)>
-KFR_SINTRIN vec<T, N> abs(vec<T, N> a)
-{
- return concat(abs(low(a)), abs(high(a)));
-}
+KFR_HANDLE_ALL_SIZES_NOT_F_1(abs)
#else
diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp
@@ -40,6 +40,9 @@ namespace kfr
return fn(make_vector(x), make_vector(rest)...)[0]; \
}
+template <typename T>
+using flt_type = conditional<std::is_floating_point<deep_subtype<T>>::value, T, deep_rebind<T, fbase>>;
+
namespace internal
{
#ifdef CID_ARCH_X86
@@ -145,6 +148,74 @@ KFR_SINTRIN vec<T, Nout> expand_simd(vec<T, N> x)
return fn(make_vector(a))[0]; \
}
+#define KFR_HANDLE_ALL_SIZES_FLT_1(fn) \
+ template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
+ KFR_SINTRIN vec<flt_type<T>, N> fn(vec<T, N> a) \
+ { \
+ return slice<0, N>(fn(expand_simd(cast<flt_type<T>>(a)))); \
+ } \
+ template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \
+ KFR_SINTRIN vec<flt_type<T>, N> fn(vec<T, N> a) \
+ { \
+ return concat(fn(low(cast<flt_type<T>>(a))), fn(high(cast<flt_type<T>>(a)))); \
+ }
+
+#define KFR_HANDLE_ALL_SIZES_F_1(fn) \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_f_class<T>::value)> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return slice<0, N>(fn(expand_simd(a))); \
+ } \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_f_class<T>::value), typename = void> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return concat(fn(low(a)), fn(high(a))); \
+ }
+
+#define KFR_HANDLE_ALL_SIZES_I_1(fn) \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_i_class<T>::value)> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return slice<0, N>(fn(expand_simd(a))); \
+ } \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_i_class<T>::value), typename = void> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return concat(fn(low(a)), fn(high(a))); \
+ }
+
+#define KFR_HANDLE_ALL_SIZES_U_1(fn) \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && is_u_class<T>::value)> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return slice<0, N>(fn(expand_simd(a))); \
+ } \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && is_u_class<T>::value), typename = void> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return concat(fn(low(a)), fn(high(a))); \
+ }
+
+#define KFR_HANDLE_ALL_SIZES_NOT_F_1(fn) \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N < vector_width<T, cpu_t::native> && !is_f_class<T>::value)> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return slice<0, N>(fn(expand_simd(a))); \
+ } \
+ template <typename T, size_t N, \
+ KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native> && !is_f_class<T>::value), typename = void> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \
+ { \
+ return concat(fn(low(a)), fn(high(a))); \
+ }
+
#define KFR_HANDLE_ALL_SIZES_2(fn) \
template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \
KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b) \
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -78,11 +78,11 @@ KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); }
KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); }
#endif
-KFR_HANDLE_ALL_SIZES_1(floor)
-KFR_HANDLE_ALL_SIZES_1(ceil)
-KFR_HANDLE_ALL_SIZES_1(round)
-KFR_HANDLE_ALL_SIZES_1(trunc)
-KFR_HANDLE_ALL_SIZES_1(fract)
+KFR_HANDLE_ALL_SIZES_F_1(floor)
+KFR_HANDLE_ALL_SIZES_F_1(ceil)
+KFR_HANDLE_ALL_SIZES_F_1(round)
+KFR_HANDLE_ALL_SIZES_F_1(trunc)
+KFR_HANDLE_ALL_SIZES_F_1(fract)
#else
@@ -144,46 +144,46 @@ KFR_SINTRIN vec<f64, N> fract(vec<f64, N> x)
}
#endif
-// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-// KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
-//{
-// return value;
-//}
-// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-// KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
-//{
-// return value;
-//}
-// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-// KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
-//{
-// return value;
-//}
-// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-// KFR_SINTRIN vec<T, N> round(vec<T, N> value)
-//{
-// return value;
-//}
-// template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-// KFR_SINTRIN vec<T, N> fract(vec<T, N>)
-//{
-// return T(0);
-//}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
+{
+ return value;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
+{
+ return value;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
+{
+ return value;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> round(vec<T, N> value)
+{
+ return value;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+KFR_SINTRIN vec<T, N> fract(vec<T, N>)
+{
+ return T(0);
+}
KFR_HANDLE_SCALAR_1(floor)
KFR_HANDLE_SCALAR_1(ceil)
KFR_HANDLE_SCALAR_1(round)
KFR_HANDLE_SCALAR_1(trunc)
KFR_HANDLE_SCALAR_1(fract)
-KFR_FN(floor)
-KFR_FN(ceil)
-KFR_FN(round)
-KFR_FN(trunc)
-KFR_FN(fract)
+KFR_I_FN(floor)
+KFR_I_FN(ceil)
+KFR_I_FN(round)
+KFR_I_FN(trunc)
+KFR_I_FN(fract)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> floor(const T1& x)
+KFR_INTRIN T1 floor(const T1& x)
{
return internal::floor(x);
}
@@ -195,7 +195,7 @@ KFR_INTRIN expr_func<internal::fn_floor, E1> floor(E1&& x)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> ceil(const T1& x)
+KFR_INTRIN T1 ceil(const T1& x)
{
return internal::ceil(x);
}
@@ -207,7 +207,7 @@ KFR_INTRIN expr_func<internal::fn_ceil, E1> ceil(E1&& x)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> round(const T1& x)
+KFR_INTRIN T1 round(const T1& x)
{
return internal::round(x);
}
@@ -219,7 +219,7 @@ KFR_INTRIN expr_func<internal::fn_round, E1> round(E1&& x)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> trunc(const T1& x)
+KFR_INTRIN T1 trunc(const T1& x)
{
return internal::trunc(x);
}
@@ -231,7 +231,7 @@ KFR_INTRIN expr_func<internal::fn_trunc, E1> trunc(E1&& x)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> fract(const T1& x)
+KFR_INTRIN T1 fract(const T1& x)
{
return internal::fract(x);
}
diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp
@@ -42,7 +42,7 @@ KFR_SINTRIN f32avx sqrt(f32avx x) { return _mm256_sqrt_ps(*x); }
KFR_SINTRIN f64avx sqrt(f64avx x) { return _mm256_sqrt_pd(*x); }
#endif
-KFR_HANDLE_ALL_SIZES_1(sqrt)
+KFR_HANDLE_ALL_SIZES_FLT_1(sqrt)
#else
@@ -58,7 +58,7 @@ KFR_FN(sqrt)
}
template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
-KFR_INTRIN ftype<T1> sqrt(const T1& x)
+KFR_INTRIN flt_type<T1> sqrt(const T1& x)
{
return internal::sqrt(x);
}