kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 75d53fab869b9491115fcac83f532236e0d28544
parent 7a121a55a3969cb549a2c418e9944e344a105f6b
Author: [email protected] <[email protected]>
Date:   Fri, 22 Jul 2016 17:30:23 +0300

__builtin_prefetch

Diffstat:
Minclude/kfr/dft/fft.hpp | 11+++++++++++
1 file changed, 11 insertions(+), 0 deletions(-)

diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -256,16 +256,27 @@ KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, s template <typename T> KFR_INTRIN void prefetch_one(const complex<T>* in) { +#ifdef CID_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); +#else + __builtin_prefetch(ptr_cast<void>(in)); +#endif } template <typename T> KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in) { +#ifdef CID_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); __builtin_prefetch(ptr_cast<void>(in + stride), 0, _MM_HINT_T0); __builtin_prefetch(ptr_cast<void>(in + stride * 2), 0, _MM_HINT_T0); __builtin_prefetch(ptr_cast<void>(in + stride * 3), 0, _MM_HINT_T0); +#else + __builtin_prefetch(ptr_cast<void>(in)); + __builtin_prefetch(ptr_cast<void>(in + stride)); + __builtin_prefetch(ptr_cast<void>(in + stride * 2)); + __builtin_prefetch(ptr_cast<void>(in + stride * 3)); +#endif } template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2,