commit 504c432ce7c5b0679d111266152d24b46730e613
parent a5251827c16beec42b386b5e78d0663ae37639d8
Author: [email protected] <[email protected]>
Date: Sun, 12 Nov 2023 05:59:08 +0000
DFT: template for use_table
Diffstat:
2 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/include/kfr/dft/impl/bitrev.hpp b/include/kfr/dft/impl/bitrev.hpp
@@ -49,23 +49,23 @@ constexpr inline static size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev
template <size_t Bits>
CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x)
{
- if (Bits > bitrev_table_log2N)
+ if constexpr (Bits > bitrev_table_log2N)
return bitreverse<Bits>(x);
return data::bitrev_table[x] >> (bitrev_table_log2N - Bits);
}
-CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x, size_t bits)
+template <bool use_table>
+CMT_GNU_CONSTEXPR inline u32 bitrev_using_table(u32 x, size_t bits, cbool_t<use_table>)
{
- if (bits > bitrev_table_log2N)
+ if constexpr (use_table)
{
- if (bits <= 16)
- return bitreverse<16>(x) >> (16 - bits);
- else
- return bitreverse<32>(x) >> (32 - bits);
+ return data::bitrev_table[x] >> (bitrev_table_log2N - bits);
+ }
+ else
+ {
+ return bitreverse<32>(x) >> (32 - bits);
}
-
- return data::bitrev_table[x] >> (bitrev_table_log2N - bits);
}
CMT_GNU_CONSTEXPR inline u32 dig4rev_using_table(u32 x, size_t bits)
@@ -332,8 +332,8 @@ KFR_INTRINSIC void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4,
cwrite_reordered(inout + i, vj, N4, cbool_t<use_br2>());
}
-template <typename T>
-KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2)
+template <typename T, bool use_table>
+KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2, cbool_t<use_table>)
{
const size_t N = size_t(1) << log2n;
const size_t N4 = N / 4;
@@ -345,7 +345,7 @@ KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2)
for (size_t i = 0; i < iend;)
{
- size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+ size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4, cbool<use_table>) << 3;
if (i >= j)
{
fft_reorder_swap_n4(io, i, j, N4, use_br2);
@@ -381,6 +381,19 @@ KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2)
}
template <typename T>
+KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2)
+{
+ if (log2n - 4 > bitrev_table_log2N)
+ {
+ fft_reorder(inout, log2n, ctrue, cfalse);
+ }
+ else
+ {
+ fft_reorder(inout, log2n, ctrue, ctrue);
+ }
+}
+
+template <typename T>
KFR_INTRINSIC void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
{
const size_t N = size_t(1) << log2n;
diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp
@@ -53,6 +53,7 @@ constexpr bool inline use_autosort = false;
#define KFR_AUTOSORT_FOR_256D
#define KFR_AUTOSORT_FOR_512
#define KFR_AUTOSORT_FOR_1024
+#define KFR_AUTOSORT_FOR_2048
#ifdef CMT_ARCH_AVX
template <>
@@ -1634,6 +1635,8 @@ struct fft_specialization<T, 11> : dft_stage<T>
radix8_autosort_pass_last(256, csize<width>, no, no, no, cbool<inverse>, out, out, tw);
}
};
+
+#else
#endif
} // namespace intrinsics
@@ -1736,7 +1739,7 @@ KFR_INTRINSIC void init_fft(dft_plan<T>* self, size_t size, dft_order)
{
const size_t log2n = ilog2(size);
cswitch(
- csizes_t<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10>(), log2n,
+ csizes_t<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11>(), log2n,
[&](auto log2n)
{
(void)log2n;