kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 6ff1acd1d2216a0655755c48a805cb14c130c150
parent e0edb39a1d4fdeda5ddd097077475415dcafb71a
Author: [email protected] <[email protected]>
Date:   Mon, 25 Jul 2016 13:01:16 +0300

Move platform specific types to function.hpp

Diffstat:
Minclude/kfr/base/function.hpp | 134++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Minclude/kfr/base/vec.hpp | 57++++++++++-----------------------------------------------
2 files changed, 143 insertions(+), 48 deletions(-)

diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp @@ -48,7 +48,7 @@ namespace kfr } #define KFR_HANDLE_SCALAR(fn) \ - template <typename T, typename... Ts, KFR_ENABLE_IF(!is_vec<T>::value)> \ + template <typename T, typename... Ts, KFR_ENABLE_IF(is_numeric_args<T, Ts...>::value)> \ KFR_SINTRIN auto fn(const T& x, const Ts&... rest) \ { \ return fn(make_vector(x), make_vector(rest)...)[0]; \ @@ -120,5 +120,137 @@ KFR_INLINE auto handle_all_reduce(vec<T, N> x, Args&&... args) return handle_all_reduce_f<cur>(redfn, fn, x, std::forward<Args>(args)...); } } + +namespace internal +{ + +using f32sse = vec<f32, 4>; +using f64sse = vec<f64, 2>; +using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>; +using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>; +using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>; +using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>; +using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>; +using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>; +using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>; +using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>; + +using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>; +using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>; +using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>; +using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>; +using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>; +using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>; +using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>; +using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>; +using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>; +using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>; + +using f32avx = vec<f32, 8>; +using f64avx = vec<f64, 4>; +using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>; +using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>; +using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>; +using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>; +using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>; +using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>; +using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>; +using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>; + +using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>; +using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>; +using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>; +using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>; +using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>; +using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>; +using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>; +using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>; +using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>; +using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>; + +template <cpu_t c, typename T> +constexpr inline size_t next_simd_width(size_t n) +{ + return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>; +} + +template <typename T, size_t N, size_t Nout = next_simd_width<cpu_t::native, T>(N)> +KFR_SINTRIN vec<T, Nout> expand_simd(vec<T, N> x) +{ + return extend<Nout>(x); +} + +#define KFR_HANDLE_ALL_SIZES_1(fn) \ + template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \ + { \ + return slice<0, N>(fn(expand_simd(a))); \ + } \ + template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a) \ + { \ + return concat(fn(low(a)), fn(high(a))); \ + } +#define KFR_HANDLE_SCALAR_1(fn) \ + template <typename T, KFR_ENABLE_IF(is_numeric<T>::value)> \ + KFR_SINTRIN T fn(T a) \ + { \ + return fn(make_vector(a))[0]; \ + } + +#define KFR_HANDLE_ALL_SIZES_2(fn) \ + template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b) \ + { \ + return slice<0, N>(fn(expand_simd(a), expand_simd(b))); \ + } \ + template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b) \ + { \ + return concat(fn(low(a), low(b)), fn(high(a), high(b))); \ + } +#define KFR_HANDLE_SCALAR_2(fn) \ + template <typename T, KFR_ENABLE_IF(is_numeric<T>::value)> \ + KFR_SINTRIN T fn(T a, T b) \ + { \ + return fn(make_vector(a), make_vector(b))[0]; \ + } + +#define KFR_HANDLE_ALL_SIZES_3(fn) \ + template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b, vec<T, N> c) \ + { \ + return slice<0, N>(fn(expand_simd(a), expand_simd(b), expand_simd(c))); \ + } \ + template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b, vec<T, N> c) \ + { \ + return concat(fn(low(a), low(b), low(c)), fn(high(a), high(b), high(c))); \ + } +#define KFR_HANDLE_SCALAR_3(fn) \ + template <typename T, KFR_ENABLE_IF(is_numeric<T>::value)> \ + KFR_SINTRIN T fn(T a, T b, T c) \ + { \ + return fn(make_vector(a), make_vector(b), make_vector(c))[0]; \ + } + +#define KFR_HANDLE_ALL_SIZES_4(fn) \ + template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b, vec<T, N> c, vec<T, N> d) \ + { \ + return slice<0, N>(fn(expand_simd(a), expand_simd(b), expand_simd(c), expand_simd(d))); \ + } \ + template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> a, vec<T, N> b, vec<T, N> c, vec<T, N> d) \ + { \ + return concat(fn(low(a), low(b), low(c), low(d)), fn(high(a), high(b), high(c), high(d))); \ + } +#define KFR_HANDLE_SCALAR_4(fn) \ + template <typename T, KFR_ENABLE_IF(is_numeric<T>::value)> \ + KFR_SINTRIN T fn(T a, T b, T c, T d) \ + { \ + return fn(make_vector(a), make_vector(b), make_vector(c), make_vector(d))[0]; \ + } +} } #pragma clang diagnostic pop diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -717,10 +717,10 @@ struct mask : public vec<T, N> { } - template <typename M, typename = u8[sizeof(T) == sizeof(M)]> - constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) - { - } +// template <typename M, typename = u8[sizeof(T) == sizeof(M)]> +// constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) +// { +// } constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); } constexpr KFR_INLINE mask operator&(vec<T, N> x) const { @@ -744,6 +744,12 @@ struct mask : public vec<T, N> KFR_INLINE vec<T, N>& asvec() { return ref_cast<mask>(*this); } KFR_INLINE const vec<T, N>& asvec() const { return ref_cast<mask>(*this); } + template <typename U, KFR_ENABLE_IF(sizeof(T) == sizeof(U))> + KFR_INLINE operator mask<U, N>() const + { + return bitcast<U>(*this); + } + KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; } }; @@ -1041,49 +1047,6 @@ using double16 = f64x16; namespace internal { -using f32sse = vec<f32, vector_width<f32, cpu_t::sse2>>; -using f64sse = vec<f64, vector_width<f64, cpu_t::sse2>>; -using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>; -using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>; -using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>; -using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>; -using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>; -using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>; -using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>; -using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>; - -using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>; -using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>; -using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>; -using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>; -using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>; -using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>; -using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>; -using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>; -using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>; -using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>; - -using f32avx = vec<f32, vector_width<f32, cpu_t::avx1>>; -using f64avx = vec<f64, vector_width<f64, cpu_t::avx1>>; -using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>; -using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>; -using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>; -using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>; -using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>; -using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>; -using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>; -using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>; - -using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>; -using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>; -using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>; -using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>; -using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>; -using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>; -using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>; -using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>; -using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>; -using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>; template <typename T, size_t N> struct vec_type