kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 237db95b698f5afbe3702feff8531ec00647fd4d
parent 6f6f113fe92c3795504fd23d5f684f8e7105f391
Author: [email protected] <[email protected]>
Date:   Wed, 13 Mar 2019 01:10:48 +0000

short_fir: change default vector width

Diffstat:
MCHANGELOG.md | 3++-
Minclude/kfr/dsp/fir.hpp | 12++++++------
Mtests/CMakeLists.txt | 2+-
3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -2,7 +2,7 @@ ## 3.0.7 -2019-03-11 +2019-03-13 #### Added @@ -12,6 +12,7 @@ #### Changed - `mask<>` is now a specialization of `vec<>`. This allows using many `vec` functions for masks +- `short_fir` performance has been increased by around 50%-60% #### Fixed diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -105,7 +105,7 @@ struct expression_short_fir : expression_with_arguments<E1> template <size_t N> KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, cinput_t cinput, - size_t index, vec_shape<U, N> x) + size_t index, vec_shape<U, N> x) { vec<U, N> in = self.argument_first(cinput, index, x); @@ -133,7 +133,7 @@ struct expression_fir : expression_with_arguments<E1> template <size_t N> KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, cinput_t cinput, size_t index, - vec_shape<U, N> x) + vec_shape<U, N> x) { const size_t tapcount = self.state.s.taps.size(); const vec<U, N> input = self.argument_first(cinput, index, x); @@ -184,11 +184,11 @@ KFR_INTRINSIC internal::expression_fir<T, U, E1, true> fir(fir_state<T, U>& stat * @param taps coefficients for the FIR filter */ template <typename T, size_t TapCount, typename E1> -KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount), T, value_type_of<E1>, E1> short_fir( - E1&& e1, const univector<T, TapCount>& taps) +KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1> +short_fir(E1&& e1, const univector<T, TapCount>& taps) { - static_assert(TapCount >= 2 && TapCount <= 32, "Use short_fir only for small FIR filters"); - return internal::expression_short_fir<next_poweroftwo(TapCount), T, value_type_of<E1>, E1>( + static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters"); + return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>( std::forward<E1>(e1), taps); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -33,7 +33,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/tests/cmake/") if (ENABLE_ASMTEST) add_executable(asm_test asm_test.cpp) target_link_libraries(asm_test kfr) - target_set_arch(asm_test PRIVATE sse2) + target_set_arch(asm_test PRIVATE avx) target_compile_definitions(asm_test PRIVATE KFR_SHOW_NOT_OPTIMIZED) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(asm_test PRIVATE -fno-stack-protector)