kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit ad36864930813a19a3ddb1f67430ade167372722
parent 7d1a9246ba3e7f2fd0cb90592e133377a1312821
Author: [email protected] <[email protected]>
Date:   Sat, 30 Jul 2016 19:37:06 +0300

FFT: Pass size as int (improve build speed)

Diffstat:
Minclude/kfr/dft/fft.hpp | 52++++++++++++++++++++++++++--------------------------
1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -73,7 +73,7 @@ namespace internal { template <size_t width, bool inverse, typename T> -KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>, +KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>, cvec<T, width> w, cvec<T, width> tw) { cvec<T, width> b1 = w * dupeven(tw); @@ -86,7 +86,7 @@ KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_ } template <size_t width, bool use_br2, bool inverse, bool aligned, typename T> -KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>, +KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { @@ -133,7 +133,7 @@ KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse } template <size_t width, bool inverse, typename T> -KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>, +KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>, cvec<T, width> w, cvec<T, width> tw) { vec<T, width> re1, im1, twre, twim; @@ -150,7 +150,7 @@ KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_f } template <size_t width, bool splitout, bool splitin, bool use_br2, bool inverse, bool aligned, typename T> -KFR_INTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>, +KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>, cbool_t<use_br2>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { @@ -222,7 +222,7 @@ KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) } template <typename T, size_t width> -KFR_INTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size, +KFR_SINTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size, bool split_format) { vec<T, 2 * width> result = T(); @@ -254,7 +254,7 @@ KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, s } template <typename T> -KFR_INTRIN void prefetch_one(const complex<T>* in) +KFR_SINTRIN void prefetch_one(const complex<T>* in) { #ifdef CID_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); @@ -264,7 +264,7 @@ KFR_INTRIN void prefetch_one(const complex<T>* in) } template <typename T> -KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in) +KFR_SINTRIN void prefetch_four(size_t stride, const complex<T>* in) { #ifdef CID_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); @@ -281,7 +281,7 @@ KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in) template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T> -KFR_INTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>, +KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>, cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in, const complex<T>*& twiddle) { @@ -500,11 +500,11 @@ protected: KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<128>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(128, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); - radix4_pass(csize<32>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(32, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<8>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -512,9 +512,9 @@ protected: KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<128>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(128, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<32>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -522,13 +522,13 @@ protected: KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); - radix4_pass(csize<64>, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(64, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); - radix4_pass(csize<16>, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(16, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<4>, 256, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -536,11 +536,11 @@ protected: KFR_INTRIN void final_pass(csize_t<16>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); - radix4_pass(csize<64>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(64, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<16>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -689,9 +689,9 @@ protected: KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<128>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, + radix4_pass(128, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<32>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(32, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<8>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -699,7 +699,7 @@ protected: KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<128>, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + radix4_pass(128, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); radix4_pass(csize<32>, 4, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); @@ -775,11 +775,11 @@ protected: KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) { - radix4_pass(csize<256>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, + radix4_pass(256, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); - radix4_pass(csize<64>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, + radix4_pass(64, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); - radix4_pass(csize<16>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + radix4_pass(16, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle); radix4_pass(csize<4>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, out, twiddle);