commit ad36864930813a19a3ddb1f67430ade167372722
parent 7d1a9246ba3e7f2fd0cb90592e133377a1312821
Author: [email protected] <[email protected]>
Date: Sat, 30 Jul 2016 19:37:06 +0300
FFT: Pass size as int (improve build speed)
Diffstat:
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -73,7 +73,7 @@ namespace internal
{
template <size_t width, bool inverse, typename T>
-KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>,
+KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>,
cvec<T, width> w, cvec<T, width> tw)
{
cvec<T, width> b1 = w * dupeven(tw);
@@ -86,7 +86,7 @@ KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_
}
template <size_t width, bool use_br2, bool inverse, bool aligned, typename T>
-KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>,
+KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>,
cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in,
const complex<T>* twiddle)
{
@@ -133,7 +133,7 @@ KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse
}
template <size_t width, bool inverse, typename T>
-KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>,
+KFR_SINTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>,
cvec<T, width> w, cvec<T, width> tw)
{
vec<T, width> re1, im1, twre, twim;
@@ -150,7 +150,7 @@ KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_f
}
template <size_t width, bool splitout, bool splitin, bool use_br2, bool inverse, bool aligned, typename T>
-KFR_INTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>,
+KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>,
cbool_t<use_br2>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out,
const complex<T>* in, const complex<T>* twiddle)
{
@@ -222,7 +222,7 @@ KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size)
}
template <typename T, size_t width>
-KFR_INTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size,
+KFR_SINTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size,
bool split_format)
{
vec<T, 2 * width> result = T();
@@ -254,7 +254,7 @@ KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, s
}
template <typename T>
-KFR_INTRIN void prefetch_one(const complex<T>* in)
+KFR_SINTRIN void prefetch_one(const complex<T>* in)
{
#ifdef CID_ARCH_X86
__builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
@@ -264,7 +264,7 @@ KFR_INTRIN void prefetch_one(const complex<T>* in)
}
template <typename T>
-KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in)
+KFR_SINTRIN void prefetch_four(size_t stride, const complex<T>* in)
{
#ifdef CID_ARCH_X86
__builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
@@ -281,7 +281,7 @@ KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in)
template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2,
bool inverse, bool aligned, typename T>
-KFR_INTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>,
+KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>,
cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
complex<T>* out, const complex<T>* in, const complex<T>*& twiddle)
{
@@ -500,11 +500,11 @@ protected:
KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<128>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(128, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<32>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(32, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<8>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -512,9 +512,9 @@ protected:
KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(512, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<128>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(128, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<32>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -522,13 +522,13 @@ protected:
KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<64>, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(64, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<16>, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(16, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<4>, 256, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -536,11 +536,11 @@ protected:
KFR_INTRIN void final_pass(csize_t<16>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(1024, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(256, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<64>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(64, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<16>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -689,9 +689,9 @@ protected:
KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<128>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(128, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<32>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(32, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<8>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -699,7 +699,7 @@ protected:
KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<128>, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(128, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
radix4_pass(csize<32>, 4, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
@@ -775,11 +775,11 @@ protected:
KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
{
- radix4_pass(csize<256>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(256, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
- radix4_pass(csize<64>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
+ radix4_pass(64, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
cbool<aligned>, out, out, twiddle);
- radix4_pass(csize<16>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ radix4_pass(16, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);
radix4_pass(csize<4>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, out, twiddle);