commit 237db95b698f5afbe3702feff8531ec00647fd4d
parent 6f6f113fe92c3795504fd23d5f684f8e7105f391
Author: [email protected] <[email protected]>
Date: Wed, 13 Mar 2019 01:10:48 +0000
short_fir: change default vector width
Diffstat:
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,7 @@
## 3.0.7
-2019-03-11
+2019-03-13
#### Added
@@ -12,6 +12,7 @@
#### Changed
- `mask<>` is now a specialization of `vec<>`. This allows using many `vec` functions for masks
+- `short_fir` performance has been increased by around 50%-60%
#### Fixed
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -105,7 +105,7 @@ struct expression_short_fir : expression_with_arguments<E1>
template <size_t N>
KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, cinput_t cinput,
- size_t index, vec_shape<U, N> x)
+ size_t index, vec_shape<U, N> x)
{
vec<U, N> in = self.argument_first(cinput, index, x);
@@ -133,7 +133,7 @@ struct expression_fir : expression_with_arguments<E1>
template <size_t N>
KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, cinput_t cinput, size_t index,
- vec_shape<U, N> x)
+ vec_shape<U, N> x)
{
const size_t tapcount = self.state.s.taps.size();
const vec<U, N> input = self.argument_first(cinput, index, x);
@@ -184,11 +184,11 @@ KFR_INTRINSIC internal::expression_fir<T, U, E1, true> fir(fir_state<T, U>& stat
* @param taps coefficients for the FIR filter
*/
template <typename T, size_t TapCount, typename E1>
-KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount), T, value_type_of<E1>, E1> short_fir(
- E1&& e1, const univector<T, TapCount>& taps)
+KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>
+short_fir(E1&& e1, const univector<T, TapCount>& taps)
{
- static_assert(TapCount >= 2 && TapCount <= 32, "Use short_fir only for small FIR filters");
- return internal::expression_short_fir<next_poweroftwo(TapCount), T, value_type_of<E1>, E1>(
+ static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters");
+ return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>(
std::forward<E1>(e1), taps);
}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -33,7 +33,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/tests/cmake/")
if (ENABLE_ASMTEST)
add_executable(asm_test asm_test.cpp)
target_link_libraries(asm_test kfr)
- target_set_arch(asm_test PRIVATE sse2)
+ target_set_arch(asm_test PRIVATE avx)
target_compile_definitions(asm_test PRIVATE KFR_SHOW_NOT_OPTIMIZED)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
target_compile_options(asm_test PRIVATE -fno-stack-protector)