New implementation for SIMD read/write - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 8ca9e4785460793e5af8107fc87cd1a62b59f6ac
parent 64569318f21ece90d0632df3e14cbcbc793437be
Author: [email protected] <[email protected]>
Date:   Sat,  8 Oct 2016 03:07:59 +0300

New implementation for SIMD read/write

Diffstat:
M include/kfr/base/read_write.hpp  | 4 ++--
M include/kfr/base/simd.hpp  | 92 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M include/kfr/base/types.hpp  | 23 +++++++++++++++++++++++
M include/kfr/base/vec.hpp  | 160 +++++++++++++++----------------------------------------------------------------
M include/kfr/dft/ft.hpp  | 8 ++++----

5 files changed, 115 insertions(+), 172 deletions(-)
diff --git a/include/kfr/base/read_write.hpp b/include/kfr/base/read_write.hpp
@@ -35,13 +35,13 @@ namespace kfr
 template <size_t N, bool A = false, typename T>
 CMT_INLINE vec<T, N> read(const T* src)
 {
-    return internal_read_write::read<N, A, T>(src);
+    return simd_read<N * compound_type_traits<T>::width, A>(ptr_cast<subtype<T>>(src));
 }
 
 template <bool A = false, size_t N, typename T>
 CMT_INLINE void write(T* dest, const vec<T, N>& value)
 {
-    internal_read_write::write<A, N, T>(dest, value);
+    simd_write<A, N * compound_type_traits<T>::width>(ptr_cast<subtype<T>>(dest), *value);
 }
 
 template <typename... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)>
diff --git a/include/kfr/base/simd.hpp b/include/kfr/base/simd.hpp
@@ -31,53 +31,72 @@
 namespace kfr
 {
 
+constexpr size_t index_undefined = static_cast<size_t>(-1);
+
 #ifdef CMT_COMPILER_CLANG
+#define KFR_NATIVE_SIMD 1
+#endif
 
-using simdindex = int;
+#ifdef KFR_NATIVE_SIMD
 
-template <typename T, simdindex N>
+template <typename T, size_t N>
 using simd = T __attribute__((ext_vector_type(N)));
 
-#define KFT_CONVERT_VECTOR(X, T, N) __builtin_convertvector(X, ::kfr::simd<T, N>)
-
-#define KFR_SIMD_PARAM_ARE_DEDUCIBLE 1
-#define KFR_SIMD_FROM_SCALAR(X, T, N) (X)
-#define KFR_BUILTIN_SHUFFLEVECTOR(T, N, X, Y, I) __builtin_shufflevector(X, Y, I)
-
-#elif defined CMT_COMPILER_GNU
+template <typename T, size_t N, bool A>
+using simd_storage = internal::struct_with_alignment<simd<T, N>, A>;
 
-using simdindex = int;
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
-
-template <typename T, simdindex N>
-struct simd_gcc
+template <typename T, size_t N, size_t... indices>
+CMT_INLINE simd<T, sizeof...(indices)> simd_shuffle(const identity<simd<T, N>>& x,
+                                                    const identity<simd<T, N>>& y, csizes_t<indices...>)
 {
-    constexpr static size_t NN = next_poweroftwo(N);
-    typedef __attribute__((vector_size(NN * sizeof(T)))) T simd_type;
-    typedef simd_type type __attribute__((__packed__, __aligned__(sizeof(T))));
-};
-#pragma GCC diagnostic pop
-
-template <typename T, simdindex N>
-using simd = typename simd_gcc<T, N>::type;
+    return __builtin_shufflevector(x, y,
+                                   ((indices == index_undefined) ? -1 : static_cast<intptr_t>(indices))...);
+}
+template <typename T, size_t N, size_t... indices>
+CMT_INLINE simd<T, sizeof...(indices)> simd_shuffle(const identity<simd<T, N>>& x, csizes_t<indices...>)
+{
+    return __builtin_shufflevector(x, x,
+                                   ((indices == index_undefined) ? -1 : static_cast<intptr_t>(indices))...);
+}
 
-#define KFT_CONVERT_VECTOR(X, T, N) static_cast<::kfr::simd<T, N>>(X)
-#define KFR_SIMD_FROM_SCALAR(X, T, N)                                                                        \
-    (__builtin_shuffle(::kfr::simd<T, N>{ X }, ::kfr::simd<int_type<sizeof(T) * 8>, N>{ 0 }))
-#define KFR_BUILTIN_SHUFFLEVECTOR(T, N, X, Y, I) ::kfr::internal::builtin_shufflevector<T, N>(X, Y, I)
+template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(is_poweroftwo(N))>
+CMT_INLINE simd<T, N> simd_read(const T* src)
+{
+    return ptr_cast<simd_storage<T, N, A>>(src)->value;
+}
 
-namespace internal
+template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N)), typename = void>
+CMT_INLINE simd<T, N> simd_read(const T* src)
 {
-template <typename T, size_t N, typename... Int>
-KFR_INTRIN simd<T, sizeof...(Int)> builtin_shufflevector(const simd<T, N>& x, const simd<T, N>& y,
-                                                         const Int&... indices)
+    constexpr size_t first        = prev_poweroftwo(N);
+    constexpr size_t rest         = N - first;
+    constexpr auto extend_indices = cconcat(csizeseq<rest>, csizeseq<first - rest, index_undefined, 0>);
+    constexpr auto concat_indices = csizeseq<N>;
+    return simd_shuffle<T, first>(simd_read<first, A>(src),
+                                  simd_shuffle<T, rest>(simd_read<rest, false>(src + first), extend_indices),
+                                  concat_indices);
+}
+
+template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))>
+CMT_INLINE void simd_write(T* dest, const identity<simd<T, N>>& value)
 {
-    return simd<T, sizeof...(Int)>{ (indices < N ? x[indices] : y[indices])... };
+    ptr_cast<simd_storage<T, N, A>>(dest)->value = value;
 }
+
+template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N)), typename = void>
+CMT_INLINE void simd_write(T* dest, const identity<simd<T, N>>& value)
+{
+    constexpr size_t first = prev_poweroftwo(N);
+    constexpr size_t rest  = N - first;
+    simd_write<A, first>(dest, simd_shuffle(value, csizeseq<first>));
+    simd_write<false, rest>(dest + first, simd_shuffle(value, csizeseq<rest, first>));
 }
 
+#define KFR_SIMD_CAST(T, N, X) __builtin_convertvector(X, ::kfr::simd<T, N>)
+#define KFR_SIMD_BITCAST(T, N, X) ((::kfr::simd<T, N>)(X))
+#define KFR_SIMD_BROADCAST(T, N, X) ((::kfr::simd<T, N>)(X))
+#define KFR_SIMD_SHUFFLE(X, Y, ...) __builtin_shufflevector(X, Y, __VA_ARGS__)
+
 #endif
 
 template <typename T, size_t N>
@@ -85,6 +104,7 @@ struct vec_op
 {
     using type                = subtype<T>;
     using utype               = kfr::utype<type>;
+    using iutype              = conditional<kfr::is_i_class<T>::value, type, utype>;
     constexpr static size_t w = compound_type_traits<T>::width * N;
 
     CMT_INLINE constexpr static simd<type, w> add(const simd<type, w>& x, const simd<type, w>& y) noexcept
@@ -109,11 +129,13 @@ struct vec_op
     }
     CMT_INLINE constexpr static simd<type, w> shl(const simd<type, w>& x, const simd<type, w>& y) noexcept
     {
-        return x << y;
+        return reinterpret_cast<simd<type, w>>(reinterpret_cast<simd<iutype, w>>(x)
+                                               << reinterpret_cast<simd<iutype, w>>(y));
     }
     CMT_INLINE constexpr static simd<type, w> shr(const simd<type, w>& x, const simd<type, w>& y) noexcept
     {
-        return x >> y;
+        return reinterpret_cast<simd<type, w>>(reinterpret_cast<simd<iutype, w>>(x) >>
+                                               reinterpret_cast<simd<iutype, w>>(y));
     }
     CMT_INLINE constexpr static simd<type, w> neg(const simd<type, w>& x) noexcept { return -x; }
     CMT_INLINE constexpr static simd<type, w> band(const simd<type, w>& x, const simd<type, w>& y) noexcept
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -300,6 +300,29 @@ CMT_INLINE void zeroize(T1& value)
 {
     builtin_memset(static_cast<void*>(builtin_addressof(value)), 0, sizeof(T1));
 }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+template <typename T, bool A>
+struct struct_with_alignment
+{
+    T value;
+    KFR_INTRIN void operator=(T value) { this->value = value; }
+};
+
+template <typename T>
+struct struct_with_alignment<T, false>
+{
+    T value;
+    KFR_INTRIN void operator=(T value) { this->value = value; }
+}
+#ifdef CMT_GNU_ATTRIBUTES
+__attribute__((__packed__, __may_alias__)) //
+#endif
+;
+
+#pragma GCC diagnostic pop
 }
 
 template <typename T>
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -80,50 +80,11 @@ template <typename T, size_t N>
 struct is_vec_impl<mask<T, N>> : std::true_type
 {
 };
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
-
-template <typename T, bool A>
-struct struct_with_alignment
-{
-    T value;
-    KFR_INTRIN void operator=(T value) { this->value = value; }
-};
-
-template <typename T>
-struct struct_with_alignment<T, false>
-{
-    T value;
-    KFR_INTRIN void operator=(T value) { this->value = value; }
-}
-#ifdef CMT_GNU_ATTRIBUTES
-__attribute__((__packed__, __may_alias__)) //
-#endif
-;
 }
 
 template <typename T>
 using is_vec = internal::is_vec_impl<T>;
 
-template <typename T, size_t N, bool A>
-using vec_algn = internal::struct_with_alignment<simd<T, N>, A>;
-
-#pragma GCC diagnostic pop
-
-template <typename T, size_t N, bool A>
-struct vec_ptr
-{
-    constexpr CMT_INLINE vec_ptr(T* data) noexcept : data(data) {}
-    constexpr CMT_INLINE vec_ptr(const T* data) noexcept : data(const_cast<T*>(data)) {}
-    CMT_INLINE const vec_algn<T, N, A>& operator[](size_t i) const
-    {
-        return *static_cast<vec_algn<T, N, A>*>(data + i);
-    }
-    CMT_INLINE vec_algn<T, N, A>& operator[](size_t i) { return *static_cast<vec_algn<T, N, A>*>(data + i); }
-    T* data;
-};
-
 template <typename To, typename From, size_t N,
           KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value),
           size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width>
@@ -151,15 +112,12 @@ get_vec_index(int = 0)
     return fn.template operator()<index>();
 }
 
-constexpr size_t index_undefined = static_cast<size_t>(-1);
-
 template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)>
 CMT_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x,
                                                     const vec<T, N>& y)
 {
-    vec<T, sizeof...(Indices)> result = KFR_BUILTIN_SHUFFLEVECTOR(
-        T, N, *x, *y,
-        static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...);
+    vec<T, sizeof...(Indices)> result = KFR_SIMD_SHUFFLE(
+        *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...);
     return result;
 }
 
@@ -282,7 +240,7 @@ template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>
           size_t Nout = N* compound_type_traits<To>::deep_width>
 constexpr CMT_INLINE vec<To, N> builtin_convertvector(const vec<From, N>& value) noexcept
 {
-    return KFT_CONVERT_VECTOR(*value, Tsub, Nout);
+    return KFR_SIMD_CAST(Tsub, Nout, *value);
 }
 
 // scalar to scalar
@@ -364,13 +322,17 @@ constexpr CMT_INLINE To bitcast(const From& value) noexcept
 template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()>
 constexpr CMT_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
 {
-    return reinterpret_cast<typename vec<To, Nout>::simd_t>(*value);
+    using Tsub             = typename vec<To, Nout>::scalar_type;
+    constexpr size_t width = vec<To, Nout>::scalar_size();
+    return KFR_SIMD_BITCAST(Tsub, width, *value);
 }
 
 template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()>
 constexpr CMT_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept
 {
-    return reinterpret_cast<typename mask<To, Nout>::simd_t>(*value);
+    using Tsub             = typename mask<To, Nout>::scalar_type;
+    constexpr size_t width = mask<To, Nout>::scalar_size();
+    return KFR_SIMD_BITCAST(Tsub, width, *value);
 }
 
 template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
@@ -450,7 +412,7 @@ constexpr CMT_INLINE vec<T, Nout> resize(const vec<T, N>& x)
     return x;
 }
 KFR_FN(resize)
-
+/*
 namespace internal_read_write
 {
 
@@ -484,13 +446,13 @@ CMT_INLINE void write(T* dest, const vec<T, N>& value)
     internal_read_write::write<false, rest>(dest + first,
                                             shufflevector<rest, internal::shuffle_index<first>>(value));
 }
-}
+}*/
 
 template <typename T, size_t N>
 struct pkd_vec
 {
     constexpr pkd_vec() noexcept {}
-    pkd_vec(const vec<T, N>& value) noexcept { internal_read_write::write(v, value); }
+    pkd_vec(const vec<T, N>& value) noexcept { simd_write<false, vec<T, N>::scalar_size()>(v, *value); }
     template <typename... Ts>
     constexpr pkd_vec(Ts... init) noexcept : v{ static_cast<T>(init)... }
     {
@@ -578,13 +540,13 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
     static_assert(!is_vec<T>::value || is_poweroftwo(size_of<T>()),
                   "Inner vector size must be a power of two");
 
+    constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
     using UT          = utype<T>;
     using value_type  = T;
     using scalar_type = subtype<T>;
-    constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
-    using simd_t = simd<scalar_type, N * compound_type_traits<T>::width>;
-    using ref    = vec&;
-    using cref   = const vec&;
+    using simd_t      = simd<scalar_type, N * compound_type_traits<T>::width>;
+    using ref         = vec&;
+    using cref        = const vec&;
 
     constexpr static bool is_pod = true;
 
@@ -598,7 +560,7 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
     }
     template <typename U,
               KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width == 1)>
-    constexpr CMT_INLINE vec(const U& value) noexcept : v(KFR_SIMD_FROM_SCALAR(static_cast<T>(value), T, N))
+    constexpr CMT_INLINE vec(const U& value) noexcept : v(KFR_SIMD_BROADCAST(T, N, static_cast<T>(value)))
     {
     }
     template <typename... Ts>
@@ -618,18 +580,16 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
     CMT_INLINE vec& operator=(const vec&) noexcept = default;
     CMT_INLINE vec& operator=(vec&&) noexcept = default;
 
+    friend CMT_INLINE vec operator-(const vec& x) { return vec_op<T, N>::neg(x.v); }
+    friend CMT_INLINE vec operator~(const vec& x) { return vec_op<T, N>::bnot(x.v); }
     friend CMT_INLINE vec operator+(const vec& x, const vec& y) { return vec_op<T, N>::add(x.v, y.v); }
     friend CMT_INLINE vec operator-(const vec& x, const vec& y) { return vec_op<T, N>::sub(x.v, y.v); }
     friend CMT_INLINE vec operator*(const vec& x, const vec& y) { return vec_op<T, N>::mul(x.v, y.v); }
     friend CMT_INLINE vec operator/(const vec& x, const vec& y) { return vec_op<T, N>::div(x.v, y.v); }
     friend CMT_INLINE vec operator%(const vec& x, const vec& y) { return vec_op<T, N>::rem(x.v, y.v); }
-    friend CMT_INLINE vec operator-(const vec& x) { return vec_op<T, N>::neg(x.v); }
-
     friend CMT_INLINE vec operator&(const vec& x, const vec& y) { return vec_op<T, N>::band(x.v, y.v); }
     friend CMT_INLINE vec operator|(const vec& x, const vec& y) { return vec_op<T, N>::bor(x.v, y.v); }
     friend CMT_INLINE vec operator^(const vec& x, const vec& y) { return vec_op<T, N>::bxor(x.v, y.v); }
-    friend CMT_INLINE vec operator~(const vec& x) { return vec_op<T, N>::bnot(x.v); }
-
     friend CMT_INLINE vec operator<<(const vec& x, const vec& y) { return vec_op<T, N>::shl(x.v, y.v); }
     friend CMT_INLINE vec operator>>(const vec& x, const vec& y) { return vec_op<T, N>::shr(x.v, y.v); }
 
@@ -640,56 +600,16 @@ struct CMT_EMPTY_BASES vec : vec_t<T, N>, operators::empty
     friend CMT_INLINE mask<T, N> operator<=(const vec& x, const vec& y) { return vec_op<T, N>::le(x.v, y.v); }
     friend CMT_INLINE mask<T, N> operator>=(const vec& x, const vec& y) { return vec_op<T, N>::ge(x.v, y.v); }
 
-    friend CMT_INLINE vec& operator+=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::add(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator-=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::sub(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator*=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::mul(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator/=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::div(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator%=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::rem(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator&=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::band(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator|=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::bor(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator^=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::bxor(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator<<=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::shl(x.v, y.v);
-        return x;
-    }
-    friend CMT_INLINE vec& operator>>=(vec& x, const vec& y)
-    {
-        x = vec_op<T, N>::shr(x.v, y.v);
-        return x;
-    }
+    friend CMT_INLINE vec& operator+=(vec& x, const vec& y) { return x = vec_op<T, N>::add(x.v, y.v); }
+    friend CMT_INLINE vec& operator-=(vec& x, const vec& y) { return x = vec_op<T, N>::sub(x.v, y.v); }
+    friend CMT_INLINE vec& operator*=(vec& x, const vec& y) { return x = vec_op<T, N>::mul(x.v, y.v); }
+    friend CMT_INLINE vec& operator/=(vec& x, const vec& y) { return x = vec_op<T, N>::div(x.v, y.v); }
+    friend CMT_INLINE vec& operator%=(vec& x, const vec& y) { return x = vec_op<T, N>::rem(x.v, y.v); }
+    friend CMT_INLINE vec& operator&=(vec& x, const vec& y) { return x = vec_op<T, N>::band(x.v, y.v); }
+    friend CMT_INLINE vec& operator|=(vec& x, const vec& y) { return x = vec_op<T, N>::bor(x.v, y.v); }
+    friend CMT_INLINE vec& operator^=(vec& x, const vec& y) { return x = vec_op<T, N>::bxor(x.v, y.v); }
+    friend CMT_INLINE vec& operator<<=(vec& x, const vec& y) { return x = vec_op<T, N>::shl(x.v, y.v); }
+    friend CMT_INLINE vec& operator>>=(vec& x, const vec& y) { return x = vec_op<T, N>::shr(x.v, y.v); }
 
     constexpr CMT_INLINE const simd_t& operator*() const { return v; }
     CMT_GNU_CONSTEXPR CMT_INLINE simd_t& operator*() { return v; }
@@ -1359,28 +1279,6 @@ struct compound_type_traits<kfr::vec_t<T, N>>
     using deep_rebind = kfr::vec_t<cometa::deep_rebind<subtype, U>, N>;
 };
 
-#ifdef KFR_SIMD_PARAM_ARE_DEDUCIBLE
-template <typename T, size_t N>
-struct compound_type_traits<kfr::simd<T, N>>
-{
-    using subtype                      = T;
-    using deep_subtype                 = cometa::deep_subtype<T>;
-    constexpr static size_t width      = N;
-    constexpr static size_t deep_width = width * compound_type_traits<T>::width;
-    constexpr static bool is_scalar    = false;
-    constexpr static size_t depth      = cometa::compound_type_traits<T>::depth + 1;
-    template <typename U>
-    using rebind = kfr::simd<U, N>;
-    template <typename U>
-    using deep_rebind = kfr::simd<cometa::deep_rebind<subtype, U>, N>;
-
-    CMT_INLINE static constexpr const subtype& at(const kfr::simd<T, N>& value, size_t index)
-    {
-        return value[index];
-    }
-};
-#endif
-
 template <typename T, size_t N>
 struct compound_type_traits<kfr::vec<T, N>>
 {
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -109,13 +109,13 @@ using cvec = vec<T, N * 2>;
 template <size_t N, bool A = false, typename T>
 CMT_INLINE cvec<T, N> cread(const complex<T>* src)
 {
-    return internal_read_write::read<N * 2, A>(ptr_cast<T>(src));
+    return simd_read<N * 2, A>(ptr_cast<T>(src));
 }
 
 template <size_t N, bool A = false, typename T>
 CMT_INLINE void cwrite(complex<T>* dest, cvec<T, N> value)
 {
-    return internal_read_write::write<A>(ptr_cast<T>(dest), value);
+    return simd_write<A, N * 2>(ptr_cast<T>(dest), *value);
 }
 
 template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices>
@@ -168,7 +168,7 @@ CMT_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N>
 template <size_t N, bool A = false, bool split = false, typename T>
 CMT_INLINE cvec<T, N> cread_split(const complex<T>* src)
 {
-    cvec<T, N> temp = internal_read_write::read<N * 2, A>(ptr_cast<T>(src));
+    cvec<T, N> temp = simd_read<N * 2, A>(ptr_cast<T>(src));
     if (split)
         temp = splitpairs(temp);
     return temp;
@@ -179,7 +179,7 @@ CMT_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value)
 {
     if (split)
         value = interleavehalfs(value);
-    internal_read_write::write<A>(ptr_cast<T>(dest), value);
+    simd_write<A, N * 2>(ptr_cast<T>(dest), *value);
 }
 
 template <>

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	include/kfr/base/read_write.hpp	\|	4	++--
M	include/kfr/base/simd.hpp	\|	92	+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M	include/kfr/base/types.hpp	\|	23	+++++++++++++++++++++++
M	include/kfr/base/vec.hpp	\|	160	+++++++++++++++----------------------------------------------------------------
M	include/kfr/dft/ft.hpp	\|	8	++++----