kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 504c432ce7c5b0679d111266152d24b46730e613
parent a5251827c16beec42b386b5e78d0663ae37639d8
Author: [email protected] <[email protected]>
Date:   Sun, 12 Nov 2023 05:59:08 +0000

DFT: template for use_table

Diffstat:
Minclude/kfr/dft/impl/bitrev.hpp | 37+++++++++++++++++++++++++------------
Minclude/kfr/dft/impl/fft-impl.hpp | 5++++-
2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/include/kfr/dft/impl/bitrev.hpp b/include/kfr/dft/impl/bitrev.hpp @@ -49,23 +49,23 @@ constexpr inline static size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev template <size_t Bits> CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x) { - if (Bits > bitrev_table_log2N) + if constexpr (Bits > bitrev_table_log2N) return bitreverse<Bits>(x); return data::bitrev_table[x] >> (bitrev_table_log2N - Bits); } -CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x, size_t bits) +template <bool use_table> +CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x, size_t bits, cbool_t<use_table>) { - if (bits > bitrev_table_log2N) + if constexpr (use_table) { - if (bits <= 16) - return bitreverse<16>(x) >> (16 - bits); - else - return bitreverse<32>(x) >> (32 - bits); + return data::bitrev_table[x] >> (bitrev_table_log2N - bits); + } + else + { + return bitreverse<32>(x) >> (32 - bits); } - - return data::bitrev_table[x] >> (bitrev_table_log2N - bits); } CMT_GNU_CONSTEXPR inline u32 dig4rev_using_table(u32 x, size_t bits) @@ -332,8 +332,8 @@ KFR_INTRINSIC void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cwrite_reordered(inout + i, vj, N4, cbool_t<use_br2>()); } -template <typename T> -KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2) +template <typename T, bool use_table> +KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2, cbool_t<use_table>) { const size_t N = size_t(1) << log2n; const size_t N4 = N / 4; @@ -345,7 +345,7 @@ KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2) for (size_t i = 0; i < iend;) { - size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4, cbool<use_table>) << 3; if (i >= j) { fft_reorder_swap_n4(io, i, j, N4, use_br2); @@ -381,6 +381,19 @@ KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2) } template <typename T> +KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2) +{ + if (log2n - 4 > bitrev_table_log2N) + { + fft_reorder(inout, log2n, ctrue, cfalse); + } + else + { + fft_reorder(inout, log2n, ctrue, ctrue); + } +} + +template <typename T> KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2) { const size_t N = size_t(1) << log2n; diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp @@ -53,6 +53,7 @@ constexpr bool inline use_autosort = false; #define KFR_AUTOSORT_FOR_256D #define KFR_AUTOSORT_FOR_512 #define KFR_AUTOSORT_FOR_1024 +#define KFR_AUTOSORT_FOR_2048 #ifdef CMT_ARCH_AVX template <> @@ -1634,6 +1635,8 @@ struct fft_specialization<T, 11> : dft_stage<T> radix8_autosort_pass_last(256, csize<width>, no, no, no, cbool<inverse>, out, out, tw); } }; + +#else #endif } // namespace intrinsics @@ -1736,7 +1739,7 @@ KFR_INTRINSIC void init_fft(dft_plan<T>* self, size_t size, dft_order) { const size_t log2n = ilog2(size); cswitch( - csizes_t<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10>(), log2n, + csizes_t<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11>(), log2n, [&](auto log2n) { (void)log2n;