kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit fe272701e398c512900e1d067cdf68320055fc81
parent e80e1015ae7cfd71be2bb858f5c99817f48aa7c3
Author: [email protected] <[email protected]>
Date:   Thu, 21 Nov 2019 16:29:00 +0000

vec<vec<vec<T>>> support

Diffstat:
Minclude/kfr/simd/impl/operators.hpp | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/simd/vec.hpp | 118+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtests/asm_test.cpp | 153+------------------------------------------------------------------------------
3 files changed, 183 insertions(+), 154 deletions(-)

diff --git a/include/kfr/simd/impl/operators.hpp b/include/kfr/simd/impl/operators.hpp @@ -149,6 +149,62 @@ KFR_COMPLEX_OP_CVT(bor) return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ } +#define KFR_VECVECVEC_OP1(fn) \ + template <typename T1, size_t N1, size_t N2, size_t N3> \ + KFR_INTRINSIC vec<vec<vec<T1, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x) \ + { \ + return fn(x.flatten()).v; \ + } + +#define KFR_VECVECVEC_OP2(fn) \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ + const vec<vec<vec<T2, N1>, N2>, N3>& y) \ + { /* VVV @ VVV */ \ + return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ + const vec<vec<T2, N1>, N2>& y) \ + { /* VVV @ VV */ \ + return fn(innercast<C>(x.flatten()), repeat<N3>(innercast<C>(y.flatten()))).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<T1, N1>, N2>& x, \ + const vec<vec<vec<T2, N1>, N2>, N3>& y) \ + { /* VV @ VVV */ \ + return fn(repeat<N3>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, const T2& y) \ + { /* VVV @ S */ \ + return fn(innercast<C>(x.flatten()), innercast<C>(y)).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ + const vec<T2, N1>& y) \ + { /* VVV @ V */ \ + return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const T1& x, const vec<vec<vec<T2, N1>, N2>, N3>& y) \ + { /* S @ VVV */ \ + return fn(innercast<C>(x), innercast<C>(y.flatten())).v; \ + } \ + template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(is_simd_type<C>::value)> \ + KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<T1, N1>& x, \ + const vec<vec<vec<T2, N1>, N2>, N3>& y) \ + { /* V @ VVV */ \ + return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ + } + KFR_VECVEC_OP1(neg) KFR_VECVEC_OP1(bnot) KFR_VECVEC_OP2(add) @@ -159,6 +215,16 @@ KFR_VECVEC_OP2(band) KFR_VECVEC_OP2(bor) KFR_VECVEC_OP2(bxor) +KFR_VECVECVEC_OP1(neg) +KFR_VECVECVEC_OP1(bnot) +KFR_VECVECVEC_OP2(add) +KFR_VECVECVEC_OP2(sub) +KFR_VECVECVEC_OP2(mul) +KFR_VECVECVEC_OP2(div) +KFR_VECVECVEC_OP2(band) +KFR_VECVECVEC_OP2(bor) +KFR_VECVECVEC_OP2(bxor) + } // namespace intrinsics } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/simd/vec.hpp b/include/kfr/simd/vec.hpp @@ -155,9 +155,17 @@ struct compoundcast<vec<T, N>> template <typename T, size_t N1, size_t N2> struct compoundcast<vec<vec<T, N1>, N2>> { - static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x; } + static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x.v; } - static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x; } + static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; } +}; + +template <typename T, size_t N1, size_t N2, size_t N3> +struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>> +{ + static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; } + + static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; } }; } // namespace internal @@ -594,6 +602,14 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>& return vec<vec<Tout, N1>, N2>(value); } +template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, + KFR_ENABLE_IF(!is_same<Tin, Tout>::value)> +constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) + CMT_NOEXCEPT +{ + return vec<vec<vec<Tout, N1>, N2>, N3>(value); +} + template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>::value)> constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT { @@ -606,6 +622,14 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1> return value; } +template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, + KFR_ENABLE_IF(is_same<Tin, Tout>::value)> +constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast( + const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT +{ + return value; +} + // template <typename To, typename From, @@ -627,6 +651,14 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>, return vec<vec<Tout, N1>, N2>(value); } +template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, + KFR_ENABLE_IF(!is_same<Tin, Tout>::value)> +constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) + CMT_NOEXCEPT +{ + return vec<vec<vec<Tout, N1>, N2>, N3>(value); +} + template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>::value)> constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT { @@ -640,6 +672,14 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin return value; } +template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, + KFR_ENABLE_IF(is_same<Tin, Tout>::value)> +constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast( + const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT +{ + return value; +} + // template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>::value)> @@ -660,6 +700,13 @@ constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& valu return vec<Tout, N2>(value); } +template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, + KFR_ENABLE_IF(!is_same<Tin, Tout>::value)> +constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT +{ + return vec<Tout, N3>(value); +} + template <typename To, typename From> CMT_GNU_CONSTEXPR KFR_INTRINSIC To bitcast(const From& value) CMT_NOEXCEPT { @@ -1195,6 +1242,22 @@ struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>> } }; +// vector to vector<vector<vector>> +template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1> +struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>> +{ + static_assert(N1 == Ns1, ""); + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + + static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value) + { + return vec<vec<vec<To, N1>, N2>, N3>::from_flatten( + kfr::innercast<To>(value.flatten()) + .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>)); + } +}; + // vector<vector> to vector<vector> template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2> struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>> @@ -1209,6 +1272,22 @@ struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>> return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten())); } }; + +// vector<vector<vector>> to vector<vector<vector>> +template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3> +struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>> +{ + static_assert(N1 == NN1, ""); + static_assert(N2 == NN2, ""); + static_assert(N3 == NN3, ""); + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + + static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value) + { + return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten())); + } +}; } // namespace internal template <typename T, size_t N1, size_t N2 = N1> @@ -1243,6 +1322,40 @@ struct vec_vec_template using type = vec<vec<T, N1>, N2>; }; +namespace internal +{ + +template <typename T, size_t... Ns> +struct vecx_t; + +template <typename T> +struct vecx_t<T> +{ + using type = T; +}; + +template <typename T, size_t N1> +struct vecx_t<T, N1> +{ + using type = vec<T, N1>; +}; + +template <typename T, size_t N1, size_t N2> +struct vecx_t<T, N1, N2> +{ + using type = vec<vec<T, N1>, N2>; +}; + +template <typename T, size_t N1, size_t N2, size_t N3> +struct vecx_t<T, N1, N2, N3> +{ + using type = vec<vec<vec<T, N1>, N2>, N3>; +}; +} // namespace internal + +template <typename T, size_t... Ns> +using vecx = typename internal::vecx_t<T, Ns...>::type; + } // namespace CMT_ARCH_NAME template <typename T1, typename T2, size_t N> struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>> @@ -1271,6 +1384,7 @@ struct common_type_impl<kfr::vec<kfr::vec<T1, N1>, N2>, kfr::vec<T2, N1>> : common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type> { }; + } // namespace kfr namespace cometa diff --git a/tests/asm_test.cpp b/tests/asm_test.cpp @@ -177,8 +177,6 @@ using namespace kfr; #define TEST_ASM_IF(fn, MACRO) TEST_ASM_I(fn, MACRO) TEST_ASM_F(fn, MACRO) -#if 1 - TEST_ASM_UIF(add, TEST_ASM_VTY2) TEST_ASM_UIF(sub, TEST_ASM_VTY2) @@ -284,8 +282,6 @@ TEST_FFT_GEN(f64) #endif -#endif - TEST_ASM_F(sin, TEST_ASM_VTY1_F) TEST_ASM_F(cos, TEST_ASM_VTY1_F) @@ -296,154 +292,7 @@ namespace kfr #ifdef KFR_SHOW_NOT_OPTIMIZED KFR_PUBLIC void not_optimized(const char* fn) CMT_NOEXCEPT { puts(fn); } #endif -} // namespace kfr -KFR_PUBLIC void test_shuffle_old1(f32x1& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2>, overload_auto); -} - -KFR_PUBLIC void test_shuffle_old2(f32x4& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<3, 2, 1, 0>, - overload_auto); -} - -KFR_PUBLIC void test_shuffle_old3(f32x4& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<0, 1, 2, 3>, - overload_auto); -} - -KFR_PUBLIC void test_shuffle_old4(f32x2& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2, 3>, overload_auto); -} - -KFR_PUBLIC void test_shuffle_old5(f32x8& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, - csizes<3, 2, 1, 0, 0, 1, 2, 3>, overload_auto); -} - -KFR_PUBLIC void test_shuffle_old6(f32x8& x, const f32x4& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, - csizes<7, 6, 5, 4, 3, 2, 1, 0>, overload_auto); -} - -KFR_PUBLIC void test_shuffle_old9(vec<f32, 3>& x, const vec<f32, 15>& y) -{ - x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 2, 1>, - overload_auto); -} - -KFR_PUBLIC void test_shuffle_new1(f32x1& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2>); -} - -KFR_PUBLIC void test_shuffle_new2(f32x4& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<3, 2, 1, 0>); -} - -KFR_PUBLIC void test_shuffle_new3(f32x4& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<0, 1, 2, 3>); -} - -KFR_PUBLIC void test_shuffle_new4(f32x2& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2, 3>); -} - -KFR_PUBLIC void test_shuffle_new5(f32x8& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, - csizes<3, 2, 1, 0, 0, 1, 2, 3>); -} - -KFR_PUBLIC void test_shuffle_new6(f32x8& x, const f32x4& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, - csizes<7, 6, 5, 4, 3, 2, 1, 0>); -} - -KFR_PUBLIC void test_shuffle_new7(f32x1& x, const f32x32& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 32>{}, (y + 1.f).v, csizes<19>); -} - -KFR_PUBLIC void test_shuffle_new8(f32x8& x, const f32x8& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v, - csizes<3, 2, 1, 0, 3, 2, 1, 0>); -} - -KFR_PUBLIC void test_shuffle_new9(vec<f32, 3>& x, const vec<f32, 15>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 2, 1>); -} - -KFR_PUBLIC void test_shuffle_new9a(vec<f32, 3>& x, const vec<f32, 15>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<5, 6, 7>); -} - -KFR_PUBLIC void test_shuffle_new9b(vec<f32, 3>& x, const vec<f32, 15>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<11, 11, 11>); -} - -KFR_PUBLIC void test_shuffle_new9c(vec<f32, 3>& x, const vec<f32, 15>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 4, 5>); -} - -KFR_PUBLIC void test_shuffle_new10(vec<f32, 15>& x) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 1>{}, 0.f, - csizes<1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>); -} -KFR_PUBLIC void test_shuffle_new11(vec<f32, 15>& x, float y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 1>{}, y, - csizes<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>); -} -KFR_PUBLIC void test_shuffle_new12(vec<f32, 32>& x, const vec<f32, 32>& y) -{ - x.v = - kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 32>{}, y.v, csizeseq<32> ^ csize<1>); -} -KFR_PUBLIC void test_shuffle_new13(vec<f32, 8>& x, const vec<f32, 8>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v, - csizes<0, 2, 4, 6, 1, 3, 5, 7>); -} -KFR_PUBLIC void test_shuffle_new14(vec<f32, 8>& x, const vec<f32, 8>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v, - csizes<0, 4, 1, 5, 2, 6, 3, 7>); -} -KFR_PUBLIC void test_shuffle_new15(vec<f32, 4>& x, const vec<f32, 8>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v, csizes<0, 5, 2, 7>); -} -KFR_PUBLIC void test_shuffle_new16(vec<f32, 2>& x, const vec<f32, 2>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 2>{}, y.v, csizes<1, 0>); -} -KFR_PUBLIC void test_shuffle_new17(vec<f32, 16>& x, const vec<f32, 16>& y) -{ - x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 16>{}, y.v, - csizes<0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15>); -} - -KFR_PUBLIC float tuple_assign() -{ - auto [x, y, z, w] = f32x4(1.f, 2.f, 3.f, 4.f); - return x + y * y + z * z * z + w * w * w * w; -} +} // namespace kfr int main() { println(library_version()); }