commit 8ad8c1071c9bf81cad8da4f118af1d9978932e50
parent 3c1e2102dc51cdb6f269b74ed2d635e252d6faf5
Author: [email protected] <[email protected]>
Date: Wed, 17 Jan 2024 07:13:02 +0000
Multiarch refactoring
Diffstat:
34 files changed, 1355 insertions(+), 962 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,6 +22,16 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS ON)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/bin)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib)
+
if (CPU_ARCH)
if (NOT KFR_ARCH)
message(
@@ -36,12 +46,6 @@ if (CPU_ARCH)
endif ()
endif ()
-if (WIN32 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
- set(CMAKE_INSTALL_PREFIX
- ""
- CACHE STRING "Reset install prefix on Win32" FORCE)
-endif ()
-
set(X86 FALSE)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(X86 TRUE)
@@ -57,23 +61,23 @@ else ()
set(CLANG 0)
endif ()
+include(cmake/target_set_arch.cmake)
+include(cmake/link_as_whole.cmake)
+include(cmake/add_kfr_library.cmake)
+
# Include autogenerated list of source files
include(sources.cmake)
include(CMakeDependentOption)
-add_subdirectory(src/io)
+option(KFR_ENABLE_MULTIARCH
+ "Multiple architectures will be built with runtime dispatch" ON)
option(ENABLE_TESTS "Enable KFR tests" OFF)
-cmake_dependent_option(ENABLE_EXAMPLES "Enable KFR examples" ON "ENABLE_TESTS" OFF)
+cmake_dependent_option(ENABLE_EXAMPLES "Enable KFR examples" ON "ENABLE_TESTS"
+ OFF)
if (CLANG)
option(KFR_ENABLE_DFT "Enable DFT and related algorithms." ON)
option(KFR_ENABLE_DFT_NP "Enable Non-power of 2 DFT" ON)
- if (X86)
- option(
- KFR_ENABLE_DFT_MULTIARCH
- "Build DFT static libraries for various architectures. Requires Clang"
- OFF)
- endif ()
else ()
option(KFR_ENABLE_DFT "Enable DFT and related algorithms." OFF)
option(KFR_ENABLE_DFT_NP "Enable Non-power of 2 DFT" OFF)
@@ -90,29 +94,29 @@ option(KFR_INSTALL_LIBRARIES "Include libraries in installation" ON)
mark_as_advanced(KFR_ENABLE_ASMTEST)
mark_as_advanced(KFR_REGENERATE_TESTS)
mark_as_advanced(KFR_DISABLE_CLANG_EXTENSIONS)
+mark_as_advanced(KFR_STD_COMPLEX)
if (KFR_ENABLE_CAPI_BUILD AND NOT KFR_ENABLE_DFT)
- message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT to be enabled")
+ message(
+ FATAL_ERROR
+ "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT to be enabled")
endif ()
if (KFR_ENABLE_CAPI_BUILD AND NOT KFR_ENABLE_DFT_NP)
- message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_NP to be enabled")
-endif ()
-if (KFR_ENABLE_CAPI_BUILD AND KFR_ENABLE_DFT_MULTIARCH)
- message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_MULTIARCH to be disabled")
+ message(
+ FATAL_ERROR
+ "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_NP to be enabled")
endif ()
-include(cmake/target_set_arch.cmake)
-
-function (link_as_whole TARGET TYPE LIBRARY)
- if (APPLE)
- target_link_options(${TARGET} ${TYPE} -Wl,-force_load $<TARGET_FILE:${LIBRARY}>)
- elseif (WIN32)
- target_link_options(${TARGET} ${TYPE} /WHOLEARCHIVE:$<TARGET_FILE:${LIBRARY}>)
- else ()
- target_link_options(${TARGET} ${TYPE} -Wl,--push-state,--whole-archive $<TARGET_FILE:${LIBRARY}> -Wl,--pop-state)
- endif ()
-endfunction()
+function (add_arch_library NAME ARCH SRCS DEFS)
+ add_library(${NAME}_${ARCH} ${SRCS})
+ target_link_libraries(${NAME}_${ARCH} kfr)
+ target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH})
+ target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS})
+ target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH})
+endfunction ()
+add_subdirectory(src/dsp)
+add_subdirectory(src/io)
if (KFR_ENABLE_DFT)
add_subdirectory(src/dft)
endif ()
@@ -121,16 +125,40 @@ if (KFR_ENABLE_CAPI_BUILD)
endif ()
if (NOT KFR_ARCH)
- set(KFR_ARCH detect)
+ set(KFR_ARCH target)
+endif ()
+
+if (KFR_ARCH STREQUAL "detect")
+ set(KFR_ARCH host)
+endif ()
+
+set(DETECT_NAMES host target)
+if (X86)
+ set(ALLOWED_ARCHS
+ generic
+ sse
+ sse2
+ sse3
+ ssse3
+ sse41
+ sse42
+ avx
+ avx2
+ avx512)
+else ()
+ set(ALLOWED_ARCHS generic neon neon64)
endif ()
-if (KFR_ARCH STREQUAL "detect" AND X86)
+if (KFR_ARCH IN_LIST DETECT_NAMES)
+ message(STATUS "Detecting ${KFR_ARCH} architecture")
try_run(
RUN_RESULT COMPILE_RESULT "${CMAKE_CURRENT_BINARY_DIR}/tmpdir"
${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp
CMAKE_FLAGS
"-DINCLUDE_DIRECTORIES=${CMAKE_CURRENT_SOURCE_DIR}/include"
- -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_STANDARD_REQUIRED=ON
+ -DCMAKE_CXX_STANDARD=17
+ -DCMAKE_CXX_STANDARD_REQUIRED=ON
+ "-DCOMPILE_DEFINITIONS=-DCPU_${KFR_ARCH}=1"
-DCMAKE_CXX_EXTENSIONS=ON
COMPILE_OUTPUT_VARIABLE COMPILE_OUT
RUN_OUTPUT_VARIABLE RUN_OUT)
@@ -146,7 +174,7 @@ if (KFR_ARCH STREQUAL "detect" AND X86)
message(STATUS COMPILE_RESULT = ${COMPILE_RESULT})
message(STATUS RUN_RESULT = ${RUN_RESULT})
message(STATUS COMPILE_OUT = ${COMPILE_OUT})
- message(STATUS RUN_OUT = ${RUN_OUT})
+ message(FATAL_ERROR RUN_OUT = ${RUN_OUT})
endif ()
else ()
message(
@@ -155,6 +183,24 @@ else ()
)
endif ()
+if (NOT KFR_ARCH IN_LIST ALLOWED_ARCHS)
+ message(FATAL_ERROR "Incorrect architecture set by KFR_ARCH: ${KFR_ARCH}")
+endif ()
+
+if (NOT KFR_ARCHS)
+ if (X86)
+ set(KFR_ARCHS sse2 sse41 avx avx2 avx512)
+ else ()
+ set(KFR_ARCHS ${KFR_ARCH})
+ endif ()
+endif ()
+
+string (REPLACE ";" ", " KFR_ARCHS_COMMA "${KFR_ARCHS}")
+
+if (KFR_ENABLE_MULTIARCH)
+ add_compile_definitions(KFR_ENABLED_ARCHS="${KFR_ARCHS_COMMA}")
+endif ()
+
add_library(use_arch INTERFACE)
target_set_arch(use_arch INTERFACE ${KFR_ARCH})
@@ -184,12 +230,7 @@ target_compile_options(kfr INTERFACE "$<$<CONFIG:DEBUG>:-DKFR_DEBUG>")
if (APPLE)
target_compile_options(kfr INTERFACE -faligned-allocation)
endif ()
-if (NOT IOS)
- if (NOT MSVC OR CLANG)
- # target_compile_options(kfr
- # INTERFACE "${CLANG_ARG_PREFIX}-mstackrealign")
- endif ()
-endif ()
+
if (MSVC)
target_compile_options(kfr INTERFACE -bigobj -EHsc)
else ()
@@ -205,7 +246,8 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
target_compile_options(kfr INTERFACE -Wno-ignored-qualifiers -Wno-psabi)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
- target_compile_options(kfr INTERFACE -Wno-c++1z-extensions -Wno-psabi -Wno-unknown-warning-option)
+ target_compile_options(kfr INTERFACE -Wno-c++1z-extensions -Wno-psabi
+ -Wno-unknown-warning-option)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
target_compile_options(kfr INTERFACE /wd4141)
@@ -218,19 +260,10 @@ if (KFR_EXTENDED_TESTS)
target_compile_definitions(kfr INTERFACE -DKFR_EXTENDED_TESTS)
endif ()
-if (X86)
- add_executable(detect_cpu ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp)
- target_link_libraries(detect_cpu PRIVATE kfr)
- target_set_arch(detect_cpu PRIVATE generic)
-endif ()
-
-function (add_arch_library NAME ARCH SRCS DEFS)
- add_library(${NAME}_${ARCH} ${SRCS})
- target_link_libraries(${NAME}_${ARCH} kfr)
- target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH})
- target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS})
- target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH})
-endfunction ()
+# if (X86) add_executable(detect_cpu
+# ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp)
+# target_link_libraries(detect_cpu PRIVATE kfr) target_set_arch(detect_cpu
+# PRIVATE generic) endif ()
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
@@ -240,26 +273,6 @@ if (ENABLE_TESTS)
add_subdirectory(tests)
endif ()
-add_library(kfr_io ${KFR_IO_SRC})
-target_link_libraries(kfr_io kfr)
-target_link_libraries(kfr_io use_arch)
-
-if (KFR_INSTALL_LIBRARIES)
- install(
- TARGETS kfr kfr_io
- ARCHIVE DESTINATION lib
- LIBRARY DESTINATION lib
- RUNTIME DESTINATION bin)
-
- if (KFR_ENABLE_DFT AND KFR_ENABLE_CAPI_BUILD)
- install(
- TARGETS kfr_capi
- ARCHIVE DESTINATION lib
- LIBRARY DESTINATION lib
- RUNTIME DESTINATION bin)
- endif ()
-endif ()
-
set(kfr_defines)
function (append_defines_from target)
diff --git a/cmake/add_kfr_library.cmake b/cmake/add_kfr_library.cmake
@@ -0,0 +1,37 @@
+function (add_kfr_library)
+
+ cmake_parse_arguments(LIB "MULTIARCH" "NAME" "SOURCES;LIBRARIES;DEFINITIONS;OPTIONS"
+ ${ARGN})
+
+ set(${LIB_NAME}_LIBS PARENT_SCOPE)
+ if (KFR_ENABLE_MULTIARCH AND LIB_MULTIARCH)
+ add_library(${LIB_NAME} INTERFACE)
+ foreach (ARCH IN LISTS KFR_ARCHS)
+ add_library(${LIB_NAME}_${ARCH} STATIC ${LIB_SOURCES})
+ target_compile_definitions(${LIB_NAME}_${ARCH} PRIVATE CMT_MULTI=1)
+ foreach (ENABLED_ARCH IN LISTS KFR_ARCHS)
+ string(TOUPPER ${ENABLED_ARCH} ENABLED_ARCH_UPPER)
+ target_compile_definitions(${LIB_NAME}_${ARCH} PRIVATE CMT_MULTI_ENABLED_${ENABLED_ARCH_UPPER}=1)
+ endforeach()
+ list(APPEND ${LIB_NAME}_LIBS ${LIB_NAME}_${ARCH})
+ target_link_libraries(${LIB_NAME} INTERFACE ${LIB_NAME}_${ARCH})
+ target_set_arch(${LIB_NAME}_${ARCH} PRIVATE ${ARCH})
+ endforeach ()
+ list(GET KFR_ARCHS 0 BASE_ARCH)
+ target_compile_definitions(${LIB_NAME}_${BASE_ARCH} PRIVATE CMT_BASE_ARCH=1)
+
+ link_as_whole(${LIB_NAME} INTERFACE ${LIB_NAME}_${BASE_ARCH})
+ else ()
+ add_library(${LIB_NAME} STATIC ${LIB_SOURCES})
+ list(APPEND ${LIB_NAME}_LIBS ${LIB_NAME})
+ target_set_arch(${LIB_NAME} PRIVATE ${KFR_ARCH})
+ endif ()
+
+ foreach (LIB IN LISTS ${LIB_NAME}_LIBS)
+ target_link_libraries(${LIB} PUBLIC kfr)
+ target_link_libraries(${LIB} PRIVATE ${LIB_LIBRARIES})
+ target_compile_definitions(${LIB} PRIVATE ${LIB_DEFINITIONS})
+ target_compile_options(${LIB} PRIVATE ${LIB_OPTIONS})
+ endforeach ()
+
+endfunction ()
diff --git a/cmake/detect_cpu.cpp b/cmake/detect_cpu.cpp
@@ -4,6 +4,10 @@ using namespace kfr;
int main()
{
+#ifdef CPU_target
+ cpu_t cpu = cpu_t::native;
+#else
cpu_t cpu = kfr::internal_generic::detect_cpu();
+#endif
printf("%s", cpu_name(cpu));
-}
-\ No newline at end of file
+}
diff --git a/cmake/link_as_whole.cmake b/cmake/link_as_whole.cmake
@@ -0,0 +1,13 @@
+
+function (link_as_whole TARGET TYPE LIBRARY)
+ if (APPLE)
+ target_link_options(${TARGET} ${TYPE} -Wl,-force_load
+ $<TARGET_FILE:${LIBRARY}>)
+ elseif (WIN32)
+ target_link_options(${TARGET} ${TYPE}
+ /WHOLEARCHIVE:$<TARGET_FILE:${LIBRARY}>)
+ else ()
+ target_link_options(${TARGET} ${TYPE} -Wl,--push-state,--whole-archive
+ $<TARGET_FILE:${LIBRARY}> -Wl,--pop-state)
+ endif ()
+endfunction ()
diff --git a/docs/docs/installation.md b/docs/docs/installation.md
@@ -286,7 +286,7 @@ fft_specialization<double, 7>(avx2): 0, 128, 3072, 0, 1, 0, 0, 0, 1, 0, 0
#### 2. Multiple architectures (best performance)
-Setting `KFR_ENABLE_DFT_MULTIARCH` to `ON` enables multiple architectures.
+Setting `KFR_ENABLE_MULTIARCH` to `ON` enables multiple architectures.
In this case instead of a single `libkfr_dft.a` multiple arch-specific libraries will be installed.
```
cmake -GNinja -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKFR_ENABLE_DFT_MULTIARCH=ON ..
@@ -341,7 +341,7 @@ cmake -GNinja -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCM
ninja
ninja install # This installs kfr_dft_sse2.lib kfr_dft_sse41.lib kfr_dft_avx.lib kfr_dft_avx2.lib kfr_dft_avx512.lib to CMAKE_BINARY_DIR/install
```
-`KFR_ENABLE_DFT_MULTIARCH=ON` is the key option here.
+`KFR_ENABLE_MULTIARCH=ON` is the key option here.
```
/WHOLEARCHIVE:"PATH-TO-INSTALLED-KFR/lib/kfr_dft_sse2.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_sse41.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx2.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx512.lib"
```
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -24,16 +24,16 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin)
file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/svg)
add_executable(biquads biquads.cpp)
-target_link_libraries(biquads kfr use_arch)
+target_link_libraries(biquads kfr kfr_dsp use_arch)
add_executable(iir iir.cpp)
-target_link_libraries(iir kfr use_arch)
+target_link_libraries(iir kfr kfr_dsp use_arch)
add_executable(window window.cpp)
target_link_libraries(window kfr use_arch)
add_executable(fir fir.cpp)
-target_link_libraries(fir kfr use_arch)
+target_link_libraries(fir kfr kfr_dsp use_arch)
if (KFR_ENABLE_DFT)
target_link_libraries(fir kfr_dft use_arch)
@@ -41,7 +41,7 @@ if (KFR_ENABLE_DFT)
endif ()
add_executable(sample_rate_conversion sample_rate_conversion.cpp)
-target_link_libraries(sample_rate_conversion kfr kfr_io use_arch)
+target_link_libraries(sample_rate_conversion kfr kfr_io kfr_dsp use_arch)
if (KFR_ENABLE_DFT)
add_executable(dft dft.cpp)
diff --git a/include/kfr/base/filter.hpp b/include/kfr/base/filter.hpp
@@ -113,6 +113,7 @@ public:
explicit expression_filter(expression_handle<T, 1> filter_expr) : filter_expr(std::move(filter_expr)) {}
protected:
+ expression_filter() = default;
void process_buffer(T* dest, const T* src, size_t size) override
{
substitute(filter_expr, to_handle(make_univector(src, size)));
diff --git a/include/kfr/capi.h b/include/kfr/capi.h
@@ -55,9 +55,13 @@
#else
#define KFR_API_SPEC KFR_CDECL __declspec(dllimport)
#endif
+#else // !WIN32
+#ifdef KFR_BUILDING_DLL
+#define KFR_API_SPEC KFR_CDECL __attribute__((visibility("default")))
#else
#define KFR_API_SPEC KFR_CDECL
#endif
+#endif
#ifdef __cplusplus
extern "C"
@@ -84,6 +88,8 @@ extern "C"
KFR_API_SPEC const char* kfr_enabled_archs();
KFR_API_SPEC int kfr_current_arch();
+ KFR_API_SPEC const char* kfr_last_error();
+
typedef float kfr_f32;
typedef double kfr_f64;
#if defined __STDC_IEC_559_COMPLEX__ && !defined KFR_NO_C_COMPLEX_TYPES
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -706,54 +706,6 @@ extern char* gets(char* __s);
#define CMT_NARGS2(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, ...) _10
#define CMT_NARGS(...) CMT_NARGS2(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#ifdef CMT_MULTI_ENABLED_AVX512
-#define CMT_IF_ENABLED_AVX512(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_AVX512(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_AVX2
-#define CMT_IF_ENABLED_AVX2(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_AVX2(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_AVX
-#define CMT_IF_ENABLED_AVX(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_AVX(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_SSE42
-#define CMT_IF_ENABLED_SSE42(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_SSE42(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_SSE41
-#define CMT_IF_ENABLED_SSE41(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_SSE41(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_SSSE3
-#define CMT_IF_ENABLED_SSSE3(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_SSSE3(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_SSE3
-#define CMT_IF_ENABLED_SSE3(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_SSE3(...)
-#endif
-
-#ifdef CMT_MULTI_ENABLED_SSE2
-#define CMT_IF_ENABLED_SSE2(...) __VA_ARGS__
-#else
-#define CMT_IF_ENABLED_SSE2(...)
-#endif
-
#define CMT_IF_IS_AVX512(...)
#define CMT_IF_IS_AVX2(...)
#define CMT_IF_IS_AVX(...)
@@ -789,46 +741,16 @@ extern char* gets(char* __s);
#define CMT_IF_IS_SSE2(...) __VA_ARGS__
#endif
-#ifdef CMT_MULTI
-
-#define CMT_MULTI_PROTO_GATE(...) \
- if (cpu == cpu_t::runtime) \
- cpu = get_cpu(); \
- switch (cpu) \
- { \
- case cpu_t::avx512: \
- CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \
- case cpu_t::avx2: \
- CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \
- case cpu_t::avx: \
- CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \
- case cpu_t::sse41: \
- CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \
- case cpu_t::ssse3: \
- CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \
- case cpu_t::sse3: \
- CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \
- case cpu_t::sse2: \
- CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \
- default: \
- return {}; \
- }
-#define CMT_MULTI_PROTO(...) \
- inline namespace CMT_ARCH_NAME \
+#ifdef CMT_COMPILER_GNU
+#define CMT_UNREACHABLE \
+ do \
{ \
- __VA_ARGS__ \
- } \
- CMT_IF_ENABLED_SSE2(CMT_IF_IS_SSE2(inline) namespace sse2{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_SSE3(CMT_IF_IS_SSE3(inline) namespace sse3{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_SSSE3(CMT_IF_IS_SSSE3(inline) namespace ssse3{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_SSE41(CMT_IF_IS_SSE41(inline) namespace sse41{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_AVX(CMT_IF_IS_AVX(inline) namespace avx{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_AVX2(CMT_IF_IS_AVX2(inline) namespace avx2{ __VA_ARGS__ }) \
- CMT_IF_ENABLED_AVX512(CMT_IF_IS_AVX512(inline) namespace avx512{ __VA_ARGS__ })
-#else
-#define CMT_MULTI_PROTO(...) \
- inline namespace CMT_ARCH_NAME \
+ __builtin_unreachable(); \
+ } while (0)
+#elif defined(_MSC_VER)
+#define CMT_UNREACHABLE \
+ do \
{ \
- __VA_ARGS__ \
- }
+ __assume(false); \
+ } while (0)
#endif
diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp
@@ -42,41 +42,40 @@ CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
namespace kfr
{
-inline namespace CMT_ARCH_NAME
-{
-namespace intrinsics
+namespace internal_generic
{
template <typename T>
-univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2);
-template <typename T>
-univector<T> correlate(const univector_ref<const T>& src1, const univector_ref<const T>& src2);
-template <typename T>
-univector<T> autocorrelate(const univector_ref<const T>& src1);
-} // namespace intrinsics
+univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2,
+ bool correlate = false);
+}
/// @brief Convolution
-template <typename T, univector_tag Tag1, univector_tag Tag2>
-univector<T> convolve(const univector<T, Tag1>& src1, const univector<T, Tag2>& src2)
+template <typename T1, typename T2, univector_tag Tag1, univector_tag Tag2,
+ CMT_ENABLE_IF(std::is_same_v<std::remove_const_t<T1>, std::remove_const_t<T2>>)>
+univector<std::remove_const_t<T1>> convolve(const univector<T1, Tag1>& src1, const univector<T2, Tag2>& src2)
{
- return intrinsics::convolve(src1.slice(), src2.slice());
+ return internal_generic::convolve(src1.slice(), src2.slice());
}
/// @brief Correlation
-template <typename T, univector_tag Tag1, univector_tag Tag2>
-univector<T> correlate(const univector<T, Tag1>& src1, const univector<T, Tag2>& src2)
+template <typename T1, typename T2, univector_tag Tag1, univector_tag Tag2,
+ CMT_ENABLE_IF(std::is_same_v<std::remove_const_t<T1>, std::remove_const_t<T2>>)>
+univector<std::remove_const_t<T1>> correlate(const univector<T1, Tag1>& src1, const univector<T2, Tag2>& src2)
{
- return intrinsics::correlate(src1.slice(), src2.slice());
+ return internal_generic::convolve(src1.slice(), src2.slice(), true);
}
/// @brief Auto-correlation
template <typename T, univector_tag Tag1>
-univector<T> autocorrelate(const univector<T, Tag1>& src)
+univector<std::remove_const_t<T>> autocorrelate(const univector<T, Tag1>& src)
{
- return intrinsics::autocorrelate(src.slice());
+ univector<std::remove_const_t<T>> result = internal_generic::convolve(src.slice(), src.slice(), true);
+ result = result.slice(result.size() / 2);
+ return result;
}
-namespace internal
+namespace internal_generic
{
/// @brief Utility class to abstract real/complex differences
template <typename T>
@@ -94,7 +93,7 @@ struct dft_conv_plan<complex<T>> : public dft_plan<T>
size_t csize() const { return this->size; }
};
-} // namespace internal
+} // namespace internal_generic
/// @brief Convolution using Filter API
template <typename T>
@@ -118,7 +117,7 @@ protected:
using ST = subtype<T>;
constexpr static bool real_fft = !std::is_same_v<T, complex<ST>>;
- using plan_t = internal::dft_conv_plan<T>;
+ using plan_t = internal_generic::dft_conv_plan<T>;
// Length of filter data.
size_t data_size;
@@ -147,17 +146,6 @@ protected:
// Overlap saved from previous block to add into current block.
univector<T> overlap;
};
-} // namespace CMT_ARCH_NAME
-
-CMT_MULTI_PROTO(template <typename T>
- filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size);)
-#ifdef CMT_MULTI
-template <typename T>
-KFR_FUNCTION filter<T>* make_convolve_filter(cpu_t cpu, const univector_ref<const T>& taps, size_t block_size)
-{
- CMT_MULTI_PROTO_GATE(make_convolve_filter<T>(taps, block_size))
-}
-#endif
} // namespace kfr
CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -103,7 +103,7 @@ enum class dft_type
enum class dft_order
{
normal,
- internal, // possibly bit/digit-reversed, implementation-defined, faster to compute
+ internal, // possibly bit/digit-reversed, implementation-defined, may be faster to compute
};
enum class dft_pack_format
@@ -124,8 +124,10 @@ struct dft_stage;
template <typename T>
using dft_stage_ptr = std::unique_ptr<dft_stage<T>>;
-CMT_MULTI_PROTO(template <typename T> void dft_initialize(dft_plan<T>& plan);)
-CMT_MULTI_PROTO(template <typename T> void dft_real_initialize(dft_plan_real<T>& plan);)
+template <typename T>
+void dft_initialize(dft_plan<T>& plan);
+template <typename T>
+void dft_real_initialize(dft_plan_real<T>& plan);
/// @brief 1D DFT/FFT
template <typename T>
@@ -146,38 +148,17 @@ struct dft_plan
bool is_initialized() const { return size != 0; }
- explicit dft_plan(cpu_t cpu, size_t size, dft_order order = dft_order::normal)
- : size(size), temp_size(0), data_size(0), arblen(false)
+ [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with "
+ "KFR_ENABLE_MULTIARCH")]] explicit dft_plan(cpu_t cpu, size_t size,
+ dft_order order = dft_order::normal)
+ : dft_plan(size, order)
{
-#ifdef KFR_DFT_MULTI
- if (cpu == cpu_t::runtime)
- cpu = get_cpu();
- switch (cpu)
- {
- case cpu_t::avx512:
- CMT_IF_ENABLED_AVX512(avx512::dft_initialize(*this); break;)
- case cpu_t::avx2:
- CMT_IF_ENABLED_AVX2(avx2::dft_initialize(*this); break;)
- case cpu_t::avx:
- CMT_IF_ENABLED_AVX(avx::dft_initialize(*this); break;)
- case cpu_t::sse42:
- case cpu_t::sse41:
- CMT_IF_ENABLED_SSE41(sse41::dft_initialize(*this); break;)
- case cpu_t::ssse3:
- CMT_IF_ENABLED_SSSE3(ssse3::dft_initialize(*this); break;)
- case cpu_t::sse3:
- CMT_IF_ENABLED_SSE3(sse3::dft_initialize(*this); break;)
- default:
- CMT_IF_ENABLED_SSE2(sse2::dft_initialize(*this); break;);
- }
-#else
(void)cpu;
- dft_initialize(*this);
-#endif
}
explicit dft_plan(size_t size, dft_order order = dft_order::normal)
- : dft_plan(cpu_t::runtime, size, order)
+ : size(size), temp_size(0), data_size(0), arblen(false)
{
+ dft_initialize(*this);
}
void dump() const
@@ -411,40 +392,19 @@ struct dft_plan_real : dft_plan<T>
bool is_initialized() const { return size != 0; }
- explicit dft_plan_real(cpu_t cpu, size_t size, dft_pack_format fmt = dft_pack_format::CCs)
- : dft_plan<T>(typename dft_plan<T>::noinit{}, size / 2), size(size), fmt(fmt)
+ [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with "
+ "KFR_ENABLE_MULTIARCH")]] explicit dft_plan_real(cpu_t cpu, size_t size,
+ dft_pack_format fmt = dft_pack_format::CCs)
+ : dft_plan_real(size, fmt)
{
- KFR_LOGIC_CHECK(is_even(size), "dft_plan_real requires size to be even");
-#ifdef KFR_DFT_MULTI
- if (cpu == cpu_t::runtime)
- cpu = get_cpu();
- switch (cpu)
- {
- case cpu_t::avx512:
- CMT_IF_ENABLED_AVX512(avx512::dft_real_initialize(*this); break;)
- case cpu_t::avx2:
- CMT_IF_ENABLED_AVX2(avx2::dft_real_initialize(*this); break;)
- case cpu_t::avx:
- CMT_IF_ENABLED_AVX(avx::dft_real_initialize(*this); break;)
- case cpu_t::sse42:
- case cpu_t::sse41:
- CMT_IF_ENABLED_SSE41(sse41::dft_real_initialize(*this); break;)
- case cpu_t::ssse3:
- CMT_IF_ENABLED_SSSE3(ssse3::dft_real_initialize(*this); break;)
- case cpu_t::sse3:
- CMT_IF_ENABLED_SSE3(sse3::dft_real_initialize(*this); break;)
- default:
- CMT_IF_ENABLED_SSE2(sse2::dft_real_initialize(*this); break;);
- }
-#else
(void)cpu;
- dft_real_initialize(*this);
-#endif
}
explicit dft_plan_real(size_t size, dft_pack_format fmt = dft_pack_format::CCs)
- : dft_plan_real(cpu_t::runtime, size, fmt)
+ : dft_plan<T>(typename dft_plan<T>::noinit{}, size / 2), size(size), fmt(fmt)
{
+ KFR_LOGIC_CHECK(is_even(size), "dft_plan_real requires size to be even");
+ dft_real_initialize(*this);
}
void execute(complex<T>*, const complex<T>*, u8*, bool = false) const = delete;
@@ -501,9 +461,10 @@ struct dct_plan : dft_plan<T>
{
dct_plan(size_t size) : dft_plan<T>(size) { this->temp_size += sizeof(complex<T>) * size * 2; }
- dct_plan(cpu_t cpu, size_t size) : dft_plan<T>(cpu, size)
+ [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with "
+ "KFR_ENABLE_MULTIARCH")]] dct_plan(cpu_t cpu, size_t size)
+ : dct_plan(size)
{
- this->temp_size += sizeof(complex<T>) * size * 2;
}
KFR_MEM_INTRINSIC void execute(T* out, const T* in, u8* temp, bool inverse = false) const
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -89,9 +89,6 @@ struct biquad_params
biquad_params<T> normalized_all() const { return normalized_a0().normalized_b0(); }
};
-inline namespace CMT_ARCH_NAME
-{
-
template <typename T, size_t filters>
struct biquad_state
{
@@ -139,6 +136,9 @@ struct biquad_block
}
};
+inline namespace CMT_ARCH_NAME
+{
+
template <size_t filters, typename T, typename E1>
struct expression_biquads_l : public expression_with_traits<E1>
{
@@ -170,13 +170,14 @@ struct expression_biquads : expression_with_traits<E1>
};
template <size_t filters, typename T>
-KFR_INTRINSIC vec<T, filters> biquad_process(const biquad_block<T, filters>& bq,
- biquad_state<T, filters>& state, const vec<T, filters>& in)
+KFR_INTRINSIC T biquad_process(vec<T, filters>& out, const biquad_block<T, filters>& bq,
+ biquad_state<T, filters>& state, T in0, const vec<T, filters>& delayline)
{
- const vec<T, filters> out = bq.b0 * in + state.s1;
- state.s1 = state.s2 + bq.b1 * in - bq.a1 * out;
- state.s2 = bq.b2 * in - bq.a2 * out;
- return out;
+ vec<T, filters> in = insertleft(in0, delayline);
+ out = bq.b0 * in + state.s1;
+ state.s1 = state.s2 + bq.b1 * in - bq.a1 * out;
+ state.s2 = bq.b2 * in - bq.a2 * out;
+ return out[filters - 1];
}
template <size_t filters, typename T, typename E1, size_t N>
@@ -189,8 +190,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads_l<filters, T, E1>&
CMT_LOOP_UNROLL
for (size_t i = 0; i < N; i++)
{
- self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out));
- out[i] = self.state.out[filters - 1];
+ out[i] = biquad_process(self.state.out, self.bq, self.state, in[i], self.state.out);
}
return out;
@@ -204,7 +204,7 @@ KFR_INTRINSIC void begin_pass(const expression_biquads<filters, T, E1>& self, sh
for (index_t i = 0; i < filters - 1; i++)
{
const vec<T, 1> in = i < size ? get_elements(self.first(), shape<1>{ i }, axis_params_v<0, 1>) : 0;
- self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out));
+ biquad_process(self.state.out, self.bq, self.state, in[0], self.state.out);
}
}
template <size_t filters, typename T, typename E1>
@@ -226,8 +226,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s
CMT_LOOP_UNROLL
for (size_t i = 0; i < N; i++)
{
- self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out));
- out[i] = self.state.out[filters - 1];
+ out[i] = biquad_process(self.state.out, self.bq, self.state, in[i], self.state.out);
}
if (index.front() + N == self.block_end)
self.saved_state = self.state;
@@ -237,8 +236,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s
CMT_LOOP_UNROLL
for (size_t i = 0; i < N; i++)
{
- self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out));
- out[i] = self.state.out[filters - 1];
+ out[i] = biquad_process(self.state.out, self.bq, self.state, T(0), self.state.out);
}
}
else
@@ -248,14 +246,12 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s
{
const vec<T, 1> in =
get_elements(self.first(), index.add_at(i, cval<index_t, 0>), axis_params_v<0, 1>);
- self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out));
- out[i] = self.state.out[filters - 1];
+ out[i] = biquad_process(self.state.out, self.bq, self.state, in[0], self.state.out);
}
self.saved_state = self.state;
for (; i < N; i++)
{
- self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out));
- out[i] = self.state.out[filters - 1];
+ out[i] = biquad_process(self.state.out, self.bq, self.state, T(0), self.state.out);
}
}
return out;
@@ -324,14 +320,13 @@ KFR_FUNCTION expression_handle<T, 1> biquad(const std::vector<biquad_params<T>>&
return biquad<maxfiltercount>(bq.data(), bq.size(), std::forward<E1>(e1));
}
-template <typename T, size_t maxfiltercount = 4>
+} // namespace CMT_ARCH_NAME
+
+template <typename T>
class biquad_filter : public expression_filter<T>
{
public:
- biquad_filter(const biquad_params<T>* bq, size_t count)
- : expression_filter<T>(biquad<maxfiltercount>(bq, count, placeholder<T>()))
- {
- }
+ biquad_filter(const biquad_params<T>* bq, size_t count);
template <size_t N>
biquad_filter(const biquad_params<T> (&bq)[N]) : biquad_filter(bq, N)
@@ -340,17 +335,4 @@ public:
biquad_filter(const std::vector<biquad_params<T>>& bq) : biquad_filter(bq.data(), bq.size()) {}
};
-
-} // namespace CMT_ARCH_NAME
-
-CMT_MULTI_PROTO(template <typename T, size_t maxfiltercount>
- filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count);)
-
-#ifdef CMT_MULTI
-template <typename T, size_t maxfiltercount>
-KFR_FUNCTION filter<T>* make_biquad_filter(cpu_t cpu, const biquad_params<T>* bq, size_t count)
-{
- CMT_MULTI_PROTO_GATE(make_biquad_filter<T, maxfiltercount>(bq, count))
-}
-#endif
} // namespace kfr
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -39,8 +39,6 @@ CMT_PRAGMA_MSVC(warning(disable : 4244))
namespace kfr
{
-inline namespace CMT_ARCH_NAME
-{
template <typename T, size_t Size>
using fir_taps = univector<T, Size>;
@@ -90,6 +88,9 @@ struct moving_sum_state<U, tag_dynamic_vector>
mutable size_t head_cursor, tail_cursor;
};
+inline namespace CMT_ARCH_NAME
+{
+
template <size_t tapcount, typename T, typename U, typename E1, bool stateless = false>
struct expression_short_fir : expression_with_traits<E1>
{
@@ -281,6 +282,8 @@ short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&&
std::forward<E1>(e1), state);
}
+} // namespace CMT_ARCH_NAME
+
template <typename T, typename U = T>
class fir_filter : public filter<U>
{
@@ -297,34 +300,15 @@ public:
}
protected:
- void process_buffer(U* dest, const U* src, size_t size) final
- {
- make_univector(dest, size) = fir(state, make_univector(src, size));
- }
- void process_expression(U* dest, const expression_handle<U, 1>& src, size_t size) final
- {
- make_univector(dest, size) = fir(state, src);
- }
+ void process_buffer(U* dest, const U* src, size_t size) final;
+ void process_expression(U* dest, const expression_handle<U, 1>& src, size_t size) final;
-private:
fir_state<T, U> state;
};
template <typename T, typename U = T>
using filter_fir = fir_filter<T, U>;
-} // namespace CMT_ARCH_NAME
-
-CMT_MULTI_PROTO(template <typename U, typename T>
- filter<U>* make_fir_filter(const univector_ref<const T>& taps);)
-
-#ifdef CMT_MULTI
-template <typename U, typename T>
-KFR_FUNCTION filter<U>* make_fir_filter(cpu_t cpu, const univector_ref<const T>& taps)
-{
- CMT_MULTI_PROTO_GATE(make_fir_filter<U>(taps))
-}
-#endif
} // namespace kfr
CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp
@@ -46,9 +46,6 @@ enum class sample_rate_conversion_quality : int
perfect = 12,
};
-inline namespace CMT_ARCH_NAME
-{
-
using resample_quality = sample_rate_conversion_quality;
/// @brief Sample Rate converter
@@ -58,7 +55,7 @@ struct samplerate_converter
using itype = i64;
using ftype = subtype<T>;
-private:
+protected:
KFR_MEM_INTRINSIC ftype window(ftype n) const
{
return modzerobessel(kaiser_beta * sqrt(1 - sqr(2 * n - 1))) * reciprocal(modzerobessel(kaiser_beta));
@@ -95,40 +92,7 @@ public:
}
samplerate_converter(sample_rate_conversion_quality quality, itype interpolation_factor,
- itype decimation_factor, ftype scale = ftype(1), ftype cutoff = 0.5f)
- : kaiser_beta(window_param(quality)), depth(static_cast<itype>(filter_order(quality))),
- input_position(0), output_position(0)
- {
- const i64 gcf = gcd(interpolation_factor, decimation_factor);
- interpolation_factor /= gcf;
- decimation_factor /= gcf;
-
- taps = depth * interpolation_factor;
- order = size_t(depth * interpolation_factor - 1);
-
- this->interpolation_factor = interpolation_factor;
- this->decimation_factor = decimation_factor;
-
- const itype halftaps = taps / 2;
- filter = univector<T>(size_t(taps), T());
- delay = univector<T>(size_t(depth), T());
-
- cutoff = cutoff - transition_width() / c_pi<ftype, 4>;
-
- cutoff = cutoff / std::max(decimation_factor, interpolation_factor);
-
- for (itype j = 0, jj = 0; j < taps; j++)
- {
- filter[size_t(j)] =
- sinc((jj - halftaps) * cutoff * c_pi<ftype, 2>) * window(ftype(jj) / ftype(taps - 1));
- jj += size_t(interpolation_factor);
- if (jj >= taps)
- jj = jj - taps + 1;
- }
-
- const T s = reciprocal(sum(filter)) * static_cast<ftype>(interpolation_factor * scale);
- filter = filter * s;
- }
+ itype decimation_factor, ftype scale = ftype(1), ftype cutoff = 0.5f);
KFR_MEM_INTRINSIC itype input_position_to_intermediate(itype in_pos) const
{
@@ -186,56 +150,9 @@ public:
template <univector_tag Tag>
size_t process(univector<T, Tag>& output, univector_ref<const T> input)
{
- const itype required_input_size = input_size_for_output(output.size());
-
- const itype input_size = input.size();
- for (size_t i = 0; i < output.size(); i++)
- {
- const itype intermediate_index =
- output_position_to_intermediate(static_cast<itype>(i) + output_position);
- const itype intermediate_start = intermediate_index - taps + 1;
- const std::lldiv_t input_pos =
- floor_div(intermediate_start + interpolation_factor - 1, interpolation_factor);
- const itype input_start = input_pos.quot; // first input sample
- const itype tap_start = interpolation_factor - 1 - input_pos.rem;
- const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(tap_start * depth));
-
- if (input_start >= input_position + input_size)
- {
- output[i] = T(0);
- }
- else if (input_start >= input_position)
- {
- output[i] =
- dotproduct(truncate(padded(input.slice(input_start - input_position, depth)), depth),
- tap_ptr.truncate(depth));
- }
- else
- {
- const itype prev_count = input_position - input_start;
- output[i] =
- dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr.truncate(prev_count)) +
- dotproduct(truncate(padded(input.truncate(size_t(depth - prev_count))),
- size_t(depth - prev_count)),
- tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count)));
- }
- }
-
- if (required_input_size >= depth)
- {
- delay.slice(0, delay.size()) = padded(input.slice(size_t(required_input_size - depth)));
- }
- else
- {
- delay.truncate(size_t(depth - required_input_size)) = delay.slice(size_t(required_input_size));
- delay.slice(size_t(depth - required_input_size)) = padded(input);
- }
-
- input_position += required_input_size;
- output_position += output.size();
-
- return required_input_size;
+ return process_impl(output.slice(), input);
}
+
KFR_MEM_INTRINSIC double get_fractional_delay() const { return (taps - 1) * 0.5 / decimation_factor; }
KFR_MEM_INTRINSIC size_t get_delay() const { return static_cast<size_t>(get_fractional_delay()); }
@@ -247,10 +164,17 @@ public:
itype decimation_factor;
univector<T> filter;
univector<T> delay;
+
+protected:
itype input_position;
itype output_position;
+
+ size_t process_impl(univector_ref<T> output, univector_ref<const T> input);
};
+inline namespace CMT_ARCH_NAME
+{
+
namespace internal
{
diff --git a/include/kfr/kfr.h b/include/kfr/kfr.h
@@ -68,9 +68,16 @@
#define KFR_BUILD_DETAILS_2 ""
#endif
+#ifdef KFR_ENABLED_ARCHS
+#define KFR_ENABLED_ARCHS_LIST "[" KFR_ENABLED_ARCHS "] "
+#else
+#define KFR_ENABLED_ARCHS_LIST ""
+#endif
+
#define KFR_VERSION_FULL \
"KFR " KFR_VERSION_STRING KFR_DEBUG_STR \
- " " CMT_STRINGIFY(CMT_ARCH_NAME) " " CMT_ARCH_BITNESS_NAME " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME \
+ " " CMT_STRINGIFY(CMT_ARCH_NAME) " " KFR_ENABLED_ARCHS_LIST CMT_ARCH_BITNESS_NAME \
+ " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME \
")" KFR_BUILD_DETAILS_1 KFR_BUILD_DETAILS_2
#ifdef __cplusplus
diff --git a/include/kfr/multiarch.h b/include/kfr/multiarch.h
@@ -0,0 +1,196 @@
+#include "cident.h"
+
+#ifdef CMT_ARCH_X86
+
+// x86
+
+#ifdef CMT_MULTI_ENABLED_AVX512
+#define CMT_IF_ENABLED_AVX512(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_AVX512(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_AVX2
+#define CMT_IF_ENABLED_AVX2(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_AVX2(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_AVX
+#define CMT_IF_ENABLED_AVX(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_AVX(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_SSE42
+#define CMT_IF_ENABLED_SSE42(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_SSE42(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_SSE41
+#define CMT_IF_ENABLED_SSE41(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_SSE41(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_SSSE3
+#define CMT_IF_ENABLED_SSSE3(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_SSSE3(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_SSE3
+#define CMT_IF_ENABLED_SSE3(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_SSE3(...)
+#endif
+
+#ifdef CMT_MULTI_ENABLED_SSE2
+#define CMT_IF_ENABLED_SSE2(...) __VA_ARGS__
+#else
+#define CMT_IF_ENABLED_SSE2(...)
+#endif
+
+#ifdef CMT_MULTI
+
+#define CMT_MULTI_PROTO_GATE(...) \
+ if (cpu == cpu_t::runtime) \
+ cpu = get_cpu(); \
+ switch (cpu) \
+ { \
+ case cpu_t::avx512: \
+ CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \
+ case cpu_t::avx2: \
+ CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \
+ case cpu_t::avx: \
+ CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \
+ case cpu_t::sse42: \
+ CMT_IF_ENABLED_SSE42(return sse42::__VA_ARGS__;) \
+ case cpu_t::sse41: \
+ CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \
+ case cpu_t::ssse3: \
+ CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \
+ case cpu_t::sse3: \
+ CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \
+ case cpu_t::sse2: \
+ CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \
+ default: \
+ CMT_UNREACHABLE; \
+ }
+
+#define CMT_MULTI_GATE(...) \
+ switch (get_cpu()) \
+ { \
+ case cpu_t::avx512: \
+ CMT_IF_ENABLED_AVX512({ \
+ namespace ns = kfr::avx512; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::avx2: \
+ CMT_IF_ENABLED_AVX2({ \
+ namespace ns = kfr::avx2; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::avx: \
+ CMT_IF_ENABLED_AVX({ \
+ namespace ns = kfr::avx; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::sse42: \
+ CMT_IF_ENABLED_SSE42({ \
+ namespace ns = kfr::sse42; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::sse41: \
+ CMT_IF_ENABLED_SSE41({ \
+ namespace ns = kfr::sse41; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::ssse3: \
+ CMT_IF_ENABLED_SSSE3({ \
+ namespace ns = kfr::ssse3; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::sse3: \
+ CMT_IF_ENABLED_SSE3({ \
+ namespace ns = kfr::sse3; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ case cpu_t::sse2: \
+ CMT_IF_ENABLED_SSE2({ \
+ namespace ns = kfr::sse2; \
+ __VA_ARGS__; \
+ break; \
+ }) \
+ default: \
+ CMT_UNREACHABLE; \
+ }
+
+#define CMT_MULTI_PROTO(...) \
+ CMT_IF_ENABLED_SSE2(CMT_IF_IS_SSE2(inline) namespace sse2{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_SSE3(CMT_IF_IS_SSE3(inline) namespace sse3{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_SSSE3(CMT_IF_IS_SSSE3(inline) namespace ssse3{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_SSE42(CMT_IF_IS_SSE42(inline) namespace sse42{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_SSE41(CMT_IF_IS_SSE41(inline) namespace sse41{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_AVX(CMT_IF_IS_AVX(inline) namespace avx{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_AVX2(CMT_IF_IS_AVX2(inline) namespace avx2{ __VA_ARGS__ }) \
+ CMT_IF_ENABLED_AVX512(CMT_IF_IS_AVX512(inline) namespace avx512{ __VA_ARGS__ })
+#else
+#define CMT_MULTI_GATE(...) \
+ do \
+ { \
+ namespace ns = kfr::CMT_ARCH_NAME; \
+ __VA_ARGS__; \
+ break; \
+ } while (0)
+
+#define CMT_MULTI_PROTO(...) \
+ inline namespace CMT_ARCH_NAME \
+ { \
+ __VA_ARGS__ \
+ }
+#endif
+
+#if defined(CMT_BASE_ARCH) || !defined(CMT_MULTI)
+#define CMT_MULTI_NEEDS_GATE
+#else
+#endif
+
+#else
+
+// ARM
+
+#define CMT_MULTI_PROTO_GATE(...) \
+ do \
+ { \
+ return CMT_ARCH_NAME::__VA_ARGS__; \
+ } while (0)
+
+#define CMT_MULTI_GATE(...) \
+ do \
+ { \
+ namespace ns = kfr::CMT_ARCH_NAME; \
+ __VA_ARGS__; \
+ break; \
+ } while (0)
+
+#define CMT_MULTI_PROTO(...) \
+ inline namespace CMT_ARCH_NAME \
+ { \
+ __VA_ARGS__ \
+ }
+
+#if defined(CMT_BASE_ARCH) || !defined(CMT_MULTI)
+#define CMT_MULTI_NEEDS_GATE
+#else
+#endif
+
+#endif
diff --git a/sources.cmake b/sources.cmake
@@ -20,6 +20,7 @@ set(
${PROJECT_SOURCE_DIR}/include/kfr/cident.h
${PROJECT_SOURCE_DIR}/include/kfr/config.h
${PROJECT_SOURCE_DIR}/include/kfr/kfr.h
+ ${PROJECT_SOURCE_DIR}/include/kfr/multiarch.h
${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/endianness.hpp
@@ -318,6 +319,7 @@ set(
${PROJECT_SOURCE_DIR}/include/kfr/cident.h
${PROJECT_SOURCE_DIR}/include/kfr/config.h
${PROJECT_SOURCE_DIR}/include/kfr/kfr.h
+ ${PROJECT_SOURCE_DIR}/include/kfr/multiarch.h
${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp
${PROJECT_SOURCE_DIR}/include/kfr/base/endianness.hpp
@@ -457,12 +459,21 @@ set(
${PROJECT_SOURCE_DIR}/src/dft/convolution-impl.cpp
${PROJECT_SOURCE_DIR}/src/dft/dft-impl-f32.cpp
${PROJECT_SOURCE_DIR}/src/dft/dft-impl-f64.cpp
+ ${PROJECT_SOURCE_DIR}/src/dft/dft.cpp
${PROJECT_SOURCE_DIR}/src/dft/fft-impl-f32.cpp
${PROJECT_SOURCE_DIR}/src/dft/fft-impl-f64.cpp
)
set(
+ KFR_DSP_SRC
+ ${PROJECT_SOURCE_DIR}/src/dsp/biquad.cpp
+ ${PROJECT_SOURCE_DIR}/src/dsp/fir.cpp
+ ${PROJECT_SOURCE_DIR}/src/dsp/sample_rate_conversion.cpp
+)
+
+
+set(
KFR_IO_SRC
${PROJECT_SOURCE_DIR}/src/io/audiofile-impl.cpp
)
diff --git a/src/capi/CMakeLists.txt b/src/capi/CMakeLists.txt
@@ -16,118 +16,41 @@
cmake_minimum_required(VERSION 3.12)
-if (WIN32)
- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-endif ()
-
-if (X86)
- set(CAPI_ARCHS sse2 sse41 avx avx2 avx512)
-else ()
- set(CAPI_ARCHS ${KFR_ARCH})
+if (NOT WIN32)
+ set(CMAKE_CXX_VISIBILITY_PRESET "hidden")
+ set(CMAKE_C_VISIBILITY_PRESET "hidden")
endif ()
-set(CMAKE_CXX_VISIBILITY_PRESET "default")
-set(CMAKE_C_VISIBILITY_PRESET "default")
-
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-fdiagnostics-absolute-paths)
endif ()
-if (MSVC)
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
- set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
-endif ()
-
-if (APPLE)
- add_compile_options(-mmacosx-version-min=10.9)
-endif ()
-
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/bin)
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib)
-
-add_library(kfr_capi_all INTERFACE)
-target_link_libraries(kfr_capi_all INTERFACE kfr)
-if (NOT WIN32)
- add_library(kfr_capi_all_pic INTERFACE)
- target_link_libraries(kfr_capi_all_pic INTERFACE kfr)
-endif ()
-
-function (add_c_library ARCH)
- add_library(kfr_capi_${ARCH} STATIC ${KFR_DFT_SRC} dsp.cpp)
- target_link_libraries(kfr_capi_${ARCH} kfr)
- target_set_arch(kfr_capi_${ARCH} PRIVATE ${ARCH})
- target_link_libraries(kfr_capi_all INTERFACE kfr_capi_${ARCH})
- dft_compile_options(kfr_capi_${ARCH})
-
- if (NOT WIN32)
- add_library(kfr_capi_${ARCH}_pic STATIC ${KFR_DFT_SRC} dsp.cpp)
- set_property(TARGET kfr_capi_${ARCH}_pic
- PROPERTY POSITION_INDEPENDENT_CODE 1)
- target_link_libraries(kfr_capi_${ARCH}_pic kfr)
- target_set_arch(kfr_capi_${ARCH}_pic PRIVATE ${ARCH})
-
- target_link_libraries(kfr_capi_all_pic INTERFACE kfr_capi_${ARCH}_pic)
- dft_compile_options(kfr_capi_${ARCH}_pic)
- endif ()
-
-endfunction ()
-
add_library(kfr_capi SHARED capi.cpp)
-
-foreach (A IN LISTS CAPI_ARCHS)
- add_c_library(${A})
-endforeach()
-
-list(GET CAPI_ARCHS 0 FIRST_ARCH)
-
-link_as_whole(kfr_capi_all INTERFACE kfr_capi_${FIRST_ARCH})
-if (NOT WIN32)
- link_as_whole(kfr_capi_all_pic INTERFACE kfr_capi_${FIRST_ARCH}_pic)
-endif ()
-
-target_compile_definitions(
- kfr_capi
- PRIVATE -DKFR_DFT_MULTI=1
- -DCMT_MULTI=1
- -DCMT_MULTI_ENABLED_SSE2=1
- -DCMT_MULTI_ENABLED_SSE41=1
- -DCMT_MULTI_ENABLED_AVX=1
- -DCMT_MULTI_ENABLED_AVX2=1
- -DCMT_MULTI_ENABLED_AVX512=1
- -DKFR_BUILDING_DLL=1)
-
-target_set_arch(kfr_capi PRIVATE ${FIRST_ARCH})
-
-if (WIN32)
- target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all)
-else ()
- target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all_pic)
-
- if (APPLE)
- message(
- STATUS
- "Minimum macOS version is set to ${CMAKE_OSX_DEPLOYMENT_TARGET}"
- )
- message(STATUS "Set CMAKE_OSX_DEPLOYMENT_TARGET variable to change")
- else ()
- set_property(
- TARGET kfr_capi
- APPEND
- PROPERTY LINK_LIBRARIES
- -nodefaultlibs
- -Wl,-Bdynamic
- -lm
- -lc
- -Wl,-Bstatic
- -lstdc++
- -lgcc
- -s)
+target_link_libraries(kfr_capi PRIVATE kfr_dft kfr_dsp)
+
+target_compile_definitions(kfr_capi PRIVATE KFR_BUILDING_DLL=1)
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set_property(
+ TARGET kfr_capi
+ APPEND
+ PROPERTY LINK_LIBRARIES
+ -nodefaultlibs
+ -Wl,-Bdynamic
+ -lm
+ -lc
+ -Wl,-Bstatic
+ -lstdc++
+ -lgcc
+ -s)
+endif ()
+
+if (KFR_INSTALL_LIBRARIES)
+ if (KFR_ENABLE_CAPI_BUILD)
+ install(
+ TARGETS kfr_capi
+ ARCHIVE DESTINATION lib
+ LIBRARY DESTINATION lib
+ RUNTIME DESTINATION bin)
endif ()
endif ()
diff --git a/src/capi/capi.cpp b/src/capi/capi.cpp
@@ -28,349 +28,452 @@
#include <kfr/capi.h>
#include <kfr/dft.hpp>
#include <kfr/dsp.hpp>
+#include <kfr/multiarch.h>
namespace kfr
{
+static thread_local std::array<char, 256> error;
+
+void reset_error() { std::fill(error.begin(), error.end(), 0); }
+void set_error(std::string_view s)
+{
+ size_t n = std::min(s.size(), error.size() - 1);
+ auto end = std::copy_n(s.begin(), n, error.begin());
+ std::fill(end, error.end(), 0);
+}
+
+template <typename Fn, typename R = std::invoke_result_t<Fn>, typename T>
+static R try_fn(Fn&& fn, T fallback)
+{
+ try
+ {
+ auto result = fn();
+ reset_error();
+ return result;
+ }
+ catch (std::exception& e)
+ {
+ set_error(e.what());
+ return fallback;
+ }
+ catch (...)
+ {
+ set_error("(unknown exception)");
+ return fallback;
+ }
+}
+
+template <typename Fn>
+static void try_fn(Fn&& fn)
+{
+ try
+ {
+ fn();
+ reset_error();
+ }
+ catch (std::exception& e)
+ {
+ set_error(e.what());
+ }
+ catch (...)
+ {
+ set_error("(unknown exception)");
+ }
+}
extern "C"
{
-#define KFR_ENABLED_ARCHS "sse2,sse3,ssse3,sse4.1,avx,avx2,avx512"
- const char* kfr_version_string()
+ KFR_API_SPEC const char* kfr_version_string()
{
- return "KFR " KFR_VERSION_STRING KFR_DEBUG_STR " " KFR_ENABLED_ARCHS " " CMT_ARCH_BITNESS_NAME
+ return "KFR " KFR_VERSION_STRING KFR_DEBUG_STR " " KFR_ENABLED_ARCHS_LIST " " CMT_ARCH_BITNESS_NAME
" (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME ")" KFR_BUILD_DETAILS_1 KFR_BUILD_DETAILS_2;
}
- uint32_t kfr_version() { return KFR_VERSION; }
- const char* kfr_enabled_archs() { return KFR_ENABLED_ARCHS; }
- int kfr_current_arch() { return static_cast<int>(get_cpu()); }
+ KFR_API_SPEC uint32_t kfr_version() { return KFR_VERSION; }
+ KFR_API_SPEC const char* kfr_enabled_archs() { return KFR_ENABLED_ARCHS_LIST; }
+ KFR_API_SPEC int kfr_current_arch() { return static_cast<int>(get_cpu()); }
- void* kfr_allocate(size_t size) { return details::aligned_malloc(size, KFR_DEFAULT_ALIGNMENT); }
- void* kfr_allocate_aligned(size_t size, size_t alignment)
+ KFR_API_SPEC const char* kfr_last_error() { return error.data(); }
+
+ KFR_API_SPEC void* kfr_allocate(size_t size)
+ {
+ return details::aligned_malloc(size, KFR_DEFAULT_ALIGNMENT);
+ }
+ KFR_API_SPEC void* kfr_allocate_aligned(size_t size, size_t alignment)
{
return details::aligned_malloc(size, alignment);
}
- void kfr_deallocate(void* ptr) { return details::aligned_free(ptr); }
- size_t kfr_allocated_size(void* ptr) { return details::aligned_size(ptr); }
+ KFR_API_SPEC void kfr_deallocate(void* ptr) { return details::aligned_free(ptr); }
+ KFR_API_SPEC size_t kfr_allocated_size(void* ptr) { return details::aligned_size(ptr); }
- void* kfr_add_ref(void* ptr)
+ KFR_API_SPEC void* kfr_add_ref(void* ptr)
{
details::aligned_add_ref(ptr);
return ptr;
}
- void kfr_release(void* ptr) { details::aligned_release(ptr); }
+ KFR_API_SPEC void kfr_release(void* ptr) { details::aligned_release(ptr); }
- void* kfr_reallocate(void* ptr, size_t new_size)
+ KFR_API_SPEC void* kfr_reallocate(void* ptr, size_t new_size)
{
return details::aligned_reallocate(ptr, new_size, KFR_DEFAULT_ALIGNMENT);
}
- void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment)
+ KFR_API_SPEC void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment)
{
return details::aligned_reallocate(ptr, new_size, alignment);
}
- KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size)
+ KFR_API_SPEC KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size)
{
- if (size < 2)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(cpu_t::runtime, size));
+ return try_fn([&]() { return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(size)); },
+ nullptr);
}
- KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size)
+ KFR_API_SPEC KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size)
{
- if (size < 2)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(cpu_t::runtime, size));
+ return try_fn([&]() { return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(size)); },
+ nullptr);
}
- void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan) { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); }
- void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan) { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); }
+ KFR_API_SPEC void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan)
+ {
+ try_fn([&] { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); });
+ }
+ KFR_API_SPEC void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan)
+ {
+ try_fn([&] { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); });
+ }
- size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0);
}
- size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0);
}
- size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0);
}
- size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0);
}
- void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute(
- reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in),
- temp, kfr::cfalse);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<float>*>(out),
+ reinterpret_cast<const kfr::complex<float>*>(in), temp, kfr::cfalse);
+ });
}
- void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute(
- reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in),
- temp, kfr::cfalse);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<double>*>(out),
+ reinterpret_cast<const kfr::complex<double>*>(in), temp, kfr::cfalse);
+ });
}
- void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute(
- reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in),
- temp, kfr::ctrue);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<float>*>(out),
+ reinterpret_cast<const kfr::complex<float>*>(in), temp, kfr::ctrue);
+ });
}
- void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute(
- reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in),
- temp, kfr::ctrue);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<double>*>(out),
+ reinterpret_cast<const kfr::complex<double>*>(in), temp, kfr::ctrue);
+ });
}
- void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan)
+ KFR_API_SPEC void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan)
{
- delete reinterpret_cast<kfr::dft_plan<float>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dft_plan<float>*>(plan); });
}
- void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan)
+ KFR_API_SPEC void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan)
{
- delete reinterpret_cast<kfr::dft_plan<double>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dft_plan<double>*>(plan); });
}
// Real DFT plans
- KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size, KFR_DFT_PACK_FORMAT pack_format)
+ KFR_API_SPEC KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size,
+ KFR_DFT_PACK_FORMAT pack_format)
{
- if (size < 4)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>(
- new kfr::dft_plan_real<float>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format)));
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>(
+ new kfr::dft_plan_real<float>(size, static_cast<dft_pack_format>(pack_format)));
+ },
+ nullptr);
}
- KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size, KFR_DFT_PACK_FORMAT pack_format)
+ KFR_API_SPEC KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size,
+ KFR_DFT_PACK_FORMAT pack_format)
{
- if (size < 4)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>(
- new kfr::dft_plan_real<double>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format)));
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>(
+ new kfr::dft_plan_real<double>(size, static_cast<dft_pack_format>(pack_format)));
+ },
+ nullptr);
}
- void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan)
+ KFR_API_SPEC void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan)
{
- reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump();
+ try_fn([&]() { reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump(); });
}
- void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan)
+ KFR_API_SPEC void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan)
{
- reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump();
+ try_fn([&]() { reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump(); });
}
- size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0);
}
- size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0);
}
- size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0);
}
- size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0);
}
- void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const float* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const float* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute(
- reinterpret_cast<kfr::complex<float>*>(out), in, temp);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<float>*>(out), in, temp);
+ });
}
- void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const double* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const double* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute(
- reinterpret_cast<kfr::complex<double>*>(out), in, temp);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute(
+ reinterpret_cast<kfr::complex<double>*>(out), in, temp);
+ });
}
- void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const kfr_c32* in,
- uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out,
+ const kfr_c32* in, uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute(
- out, reinterpret_cast<const kfr::complex<float>*>(in), temp);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute(
+ out, reinterpret_cast<const kfr::complex<float>*>(in), temp);
+ });
}
- void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const kfr_c64* in,
- uint8_t* temp)
+ KFR_API_SPEC void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out,
+ const kfr_c64* in, uint8_t* temp)
{
- reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute(
- out, reinterpret_cast<const kfr::complex<double>*>(in), temp);
+ try_fn(
+ [&]()
+ {
+ reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute(
+ out, reinterpret_cast<const kfr::complex<double>*>(in), temp);
+ });
}
- void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan)
+ KFR_API_SPEC void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan)
{
- delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan); });
}
- void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan)
+ KFR_API_SPEC void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan)
{
- delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan); });
}
// Discrete Cosine Transform
- KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size)
+ KFR_API_SPEC KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size)
{
- if (size < 4)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DCT_PLAN_F32*>(new kfr::dct_plan<float>(cpu_t::runtime, size));
+ return try_fn([&]() { return reinterpret_cast<KFR_DCT_PLAN_F32*>(new kfr::dct_plan<float>(size)); },
+ nullptr);
}
- KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size)
+ KFR_API_SPEC KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size)
{
- if (size < 4)
- return nullptr;
- if (size > 16777216)
- return nullptr;
- return reinterpret_cast<KFR_DCT_PLAN_F64*>(new kfr::dct_plan<double>(cpu_t::runtime, size));
+ return try_fn([&]() { return reinterpret_cast<KFR_DCT_PLAN_F64*>(new kfr::dct_plan<double>(size)); },
+ nullptr);
}
- void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan) { reinterpret_cast<kfr::dct_plan<float>*>(plan)->dump(); }
- void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan) { reinterpret_cast<kfr::dct_plan<double>*>(plan)->dump(); }
+ KFR_API_SPEC void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan)
+ {
+ try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->dump(); });
+ }
+ KFR_API_SPEC void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan)
+ {
+ try_fn([&]() { reinterpret_cast<kfr::dct_plan<double>*>(plan)->dump(); });
+ }
- size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0);
}
- size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0);
}
- size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan)
+ KFR_API_SPEC size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan)
{
- return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0);
}
- size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan)
+ KFR_API_SPEC size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan)
{
- return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size;
+ return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0);
}
- void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp)
{
- reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::cfalse);
+ try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::cfalse); });
}
- void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::cfalse);
+ try_fn([&]()
+ { reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::cfalse); });
}
- void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::ctrue);
+ try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::ctrue); });
}
- void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp)
+ KFR_API_SPEC void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in,
+ uint8_t* temp)
{
- reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::ctrue);
+ try_fn([&]() { reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::ctrue); });
}
- void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan)
+ KFR_API_SPEC void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan)
{
- delete reinterpret_cast<kfr::dct_plan<float>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dct_plan<float>*>(plan); });
}
- void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan)
+ KFR_API_SPEC void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan)
{
- delete reinterpret_cast<kfr::dct_plan<double>*>(plan);
+ try_fn([&]() { delete reinterpret_cast<kfr::dct_plan<double>*>(plan); });
}
// Filters
- KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size)
+ KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size)
{
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F32*>(make_fir_filter<float>(make_univector(taps, size)));
-#else
- return reinterpret_cast<KFR_FILTER_F32*>(
- make_fir_filter<float>(cpu_t::runtime, make_univector(taps, size)));
-#endif
+ return try_fn(
+ [&]()
+ { return reinterpret_cast<KFR_FILTER_F32*>(new fir_filter<float>(make_univector(taps, size))); },
+ nullptr);
}
- KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size)
+ KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size)
{
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F64*>(make_fir_filter<double>(make_univector(taps, size)));
-#else
- return reinterpret_cast<KFR_FILTER_F64*>(
- make_fir_filter<double>(cpu_t::runtime, make_univector(taps, size)));
-#endif
+ return try_fn(
+ [&]()
+ { return reinterpret_cast<KFR_FILTER_F64*>(new fir_filter<double>(make_univector(taps, size))); },
+ nullptr);
}
- KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size,
- size_t block_size)
+ KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size,
+ size_t block_size)
{
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F32*>(
- make_convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024));
-#else
- return reinterpret_cast<KFR_FILTER_F32*>(make_convolve_filter<float>(
- cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024));
-#endif
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_FILTER_F32*>(
+ new convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024));
+ },
+ nullptr);
}
- KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size,
- size_t block_size)
+ KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size,
+ size_t block_size)
{
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F64*>(
- make_convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024));
-#else
- return reinterpret_cast<KFR_FILTER_F64*>(make_convolve_filter<double>(
- cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024));
-#endif
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_FILTER_F64*>(
+ new convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024));
+ },
+ nullptr);
}
- KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count)
+ KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count)
{
- if (sos_count < 1 || sos_count > 64)
- return nullptr;
-
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F32*>(
- make_biquad_filter<float, 64>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count));
-#else
- return reinterpret_cast<KFR_FILTER_F32*>(make_biquad_filter<float, 64>(
- cpu_t::runtime, reinterpret_cast<const biquad_params<float>*>(sos), sos_count));
-#endif
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_FILTER_F32*>(
+ new biquad_filter<float>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count));
+ },
+ nullptr);
}
- KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count)
+ KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count)
{
- if (sos_count < 1 || sos_count > 64)
- return nullptr;
-
-#ifndef CMT_MULTI
- return reinterpret_cast<KFR_FILTER_F64*>(
- make_biquad_filter<double, 64>(reinterpret_cast<const biquad_params<double>*>(sos), sos_count));
-#else
- return reinterpret_cast<KFR_FILTER_F64*>(make_biquad_filter<double, 64>(
- cpu_t::runtime, reinterpret_cast<const biquad_params<double>*>(sos), sos_count));
-#endif
+ return try_fn(
+ [&]()
+ {
+ return reinterpret_cast<KFR_FILTER_F64*>(new biquad_filter<double>(
+ reinterpret_cast<const biquad_params<double>*>(sos), sos_count));
+ },
+ nullptr);
}
- void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, size_t size)
+ KFR_API_SPEC void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input,
+ size_t size)
{
- reinterpret_cast<filter<float>*>(plan)->apply(output, input, size);
+ try_fn([&]() { reinterpret_cast<filter<float>*>(plan)->apply(output, input, size); });
}
- void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, size_t size)
+ KFR_API_SPEC void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input,
+ size_t size)
{
- reinterpret_cast<filter<double>*>(plan)->apply(output, input, size);
+ try_fn([&]() { reinterpret_cast<filter<double>*>(plan)->apply(output, input, size); });
}
- void kfr_filter_reset_f32(KFR_FILTER_F32* plan) { reinterpret_cast<filter<float>*>(plan)->reset(); }
- void kfr_filter_reset_f64(KFR_FILTER_F64* plan) { reinterpret_cast<filter<double>*>(plan)->reset(); }
+ KFR_API_SPEC void kfr_filter_reset_f32(KFR_FILTER_F32* plan)
+ {
+ try_fn([&]() { reinterpret_cast<filter<float>*>(plan)->reset(); });
+ }
+ KFR_API_SPEC void kfr_filter_reset_f64(KFR_FILTER_F64* plan)
+ {
+ try_fn([&]() { reinterpret_cast<filter<double>*>(plan)->reset(); });
+ }
- void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan) { delete reinterpret_cast<filter<f32>*>(plan); }
- void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan) { delete reinterpret_cast<filter<f64>*>(plan); }
+ KFR_API_SPEC void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan)
+ {
+ try_fn([&]() { delete reinterpret_cast<filter<f32>*>(plan); });
+ }
+ KFR_API_SPEC void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan)
+ {
+ try_fn([&]() { delete reinterpret_cast<filter<f64>*>(plan); });
+ }
}
} // namespace kfr
diff --git a/src/capi/dsp.cpp b/src/capi/dsp.cpp
@@ -1,28 +0,0 @@
-#include <kfr/dsp/biquad.hpp>
-#include <kfr/dsp/fir.hpp>
-
-namespace kfr
-{
-inline namespace CMT_ARCH_NAME
-{
-template <typename U, typename T>
-filter<U>* make_fir_filter(const univector_ref<const T>& taps)
-{
- return new fir_filter<T, U>(taps);
-}
-
-template filter<float>* make_fir_filter<float, float>(const univector_ref<const float>&);
-template filter<double>* make_fir_filter<double, double>(const univector_ref<const double>&);
-template filter<float>* make_fir_filter<float, double>(const univector_ref<const double>&);
-
-template <typename T, size_t maxfiltercount>
-KFR_FUNCTION filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count)
-{
- return new biquad_filter<T, maxfiltercount>(bq, count);
-}
-
-template filter<float>* make_biquad_filter<float, 64>(const biquad_params<float>* bq, size_t count);
-template filter<double>* make_biquad_filter<double, 64>(const biquad_params<double>* bq, size_t count);
-
-} // namespace CMT_ARCH_NAME
-} // namespace kfr
diff --git a/src/dft/CMakeLists.txt b/src/dft/CMakeLists.txt
@@ -1,47 +1,6 @@
cmake_minimum_required(VERSION 3.12)
-set(DFT_LIBS)
-
-if (KFR_ENABLE_DFT_MULTIARCH)
- add_library(kfr_dft INTERFACE)
- add_library(kfr_dft_all INTERFACE)
- target_link_libraries(kfr_dft INTERFACE kfr kfr_dft_all)
- target_compile_definitions(
- kfr_dft
- INTERFACE -DKFR_DFT_MULTI=1
- -DCMT_MULTI=1
- -DCMT_MULTI_ENABLED_SSE2=1
- -DCMT_MULTI_ENABLED_SSE41=1
- -DCMT_MULTI_ENABLED_AVX=1
- -DCMT_MULTI_ENABLED_AVX2=1
- -DCMT_MULTI_ENABLED_AVX512=1)
-
- add_arch_library(kfr_dft sse2 "${KFR_DFT_SRC}" "")
- add_arch_library(kfr_dft sse41 "${KFR_DFT_SRC}" "")
- add_arch_library(kfr_dft avx "${KFR_DFT_SRC}" "")
- add_arch_library(kfr_dft avx2 "${KFR_DFT_SRC}" "")
- add_arch_library(kfr_dft avx512 "${KFR_DFT_SRC}" "")
- list(
- APPEND
- DFT_LIBS
- kfr_dft_sse2
- kfr_dft_sse41
- kfr_dft_avx
- kfr_dft_avx2
- kfr_dft_avx512)
-
- link_as_whole(kfr_dft_all INTERFACE kfr_dft_sse2)
-
-else ()
- add_library(kfr_dft ${KFR_DFT_SRC})
- target_link_libraries(kfr_dft kfr use_arch)
- if (KFR_ENABLE_DFT_NP)
- target_compile_definitions(kfr_dft PUBLIC -DKFR_DFT_NPo2)
- else ()
- target_compile_definitions(kfr_dft PUBLIC -DKFR_DFT_NO_NPo2)
- endif ()
- list(APPEND DFT_LIBS kfr_dft)
-endif ()
+add_kfr_library(NAME kfr_dft MULTIARCH SOURCES ${KFR_DFT_SRC})
function (dft_compile_options LIB)
if (MSVC AND CLANG)
@@ -52,23 +11,14 @@ function (dft_compile_options LIB)
endif ()
endfunction ()
-foreach (LIB IN LISTS DFT_LIBS)
+foreach (LIB IN LISTS kfr_dft_LIBS)
dft_compile_options(${LIB})
endforeach ()
if (KFR_INSTALL_LIBRARIES)
- if (KFR_ENABLE_DFT_MULTIARCH)
- install(
- TARGETS kfr_dft_sse2 kfr_dft_sse41 kfr_dft_avx kfr_dft_avx2
- kfr_dft_avx512
- ARCHIVE DESTINATION lib
- LIBRARY DESTINATION lib
- RUNTIME DESTINATION bin)
- else ()
- install(
- TARGETS kfr_dft
- ARCHIVE DESTINATION lib
- LIBRARY DESTINATION lib
- RUNTIME DESTINATION bin)
- endif ()
+ install(
+ TARGETS ${kfr_dft_LIBS}
+ ARCHIVE DESTINATION lib
+ LIBRARY DESTINATION lib
+ RUNTIME DESTINATION bin)
endif ()
diff --git a/src/dft/convolution-impl.cpp b/src/dft/convolution-impl.cpp
@@ -24,65 +24,12 @@
See https://www.kfrlib.com for details.
*/
#include <kfr/base/simd_expressions.hpp>
-#include <kfr/simd/complex.hpp>
#include <kfr/dft/convolution.hpp>
+#include <kfr/simd/complex.hpp>
+#include <kfr/multiarch.h>
namespace kfr
{
-inline namespace CMT_ARCH_NAME
-{
-
-namespace intrinsics
-{
-
-template <typename T>
-univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2)
-{
- using ST = subtype<T>;
- const size_t size = next_poweroftwo(src1.size() + src2.size() - 1);
- univector<complex<ST>> src1padded = src1;
- univector<complex<ST>> src2padded = src2;
- src1padded.resize(size);
- src2padded.resize(size);
-
- dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size);
- univector<u8> temp(dft->temp_size);
- dft->execute(src1padded, src1padded, temp);
- dft->execute(src2padded, src2padded, temp);
- src1padded = src1padded * src2padded;
- dft->execute(src1padded, src1padded, temp, true);
- const ST invsize = reciprocal<ST>(static_cast<ST>(size));
- return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize;
-}
-
-template <typename T>
-univector<T> correlate(const univector_ref<const T>& src1, const univector_ref<const T>& src2)
-{
- using ST = subtype<T>;
- const size_t size = next_poweroftwo(src1.size() + src2.size() - 1);
- univector<complex<ST>> src1padded = src1;
- univector<complex<ST>> src2padded = reverse(src2);
- src1padded.resize(size);
- src2padded.resize(size);
- dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size);
- univector<u8> temp(dft->temp_size);
- dft->execute(src1padded, src1padded, temp);
- dft->execute(src2padded, src2padded, temp);
- src1padded = src1padded * src2padded;
- dft->execute(src1padded, src1padded, temp, true);
- const ST invsize = reciprocal<ST>(static_cast<ST>(size));
- return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize;
-}
-
-template <typename T>
-univector<T> autocorrelate(const univector_ref<const T>& src1)
-{
- univector<T> result = correlate(src1, src1);
- result = result.slice(result.size() / 2);
- return result;
-}
-
-} // namespace intrinsics
template <typename T>
convolve_filter<T>::convolve_filter(size_t size_, size_t block_size_)
@@ -121,7 +68,68 @@ void convolve_filter<T>::set_data(const univector_ref<const T>& data)
}
template <typename T>
-void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size)
+void convolve_filter<T>::reset()
+{
+ for (auto& segment : segments)
+ {
+ process(segment, zeros());
+ }
+ position = 0;
+ process(saved_input, zeros());
+ input_position = 0;
+ process(overlap, zeros());
+}
+
+//-------------------------------------------------------------------------------------
+
+CMT_MULTI_PROTO(namespace impl {
+ template <typename T>
+ univector<T> convolve(const univector_ref<const T>&, const univector_ref<const T>&, bool);
+
+ template <typename T>
+ class convolve_filter : public kfr::convolve_filter<T>
+ {
+ public:
+ void process_buffer_impl(T* output, const T* input, size_t size);
+ };
+})
+
+inline namespace CMT_ARCH_NAME
+{
+
+namespace impl
+{
+
+template <typename T>
+univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2, bool correlate)
+{
+ using ST = subtype<T>;
+ const size_t size = next_poweroftwo(src1.size() + src2.size() - 1);
+ univector<complex<ST>> src1padded = src1;
+ univector<complex<ST>> src2padded;
+ if (correlate)
+ src2padded = reverse(src2);
+ else
+ src2padded = src2;
+ src1padded.resize(size);
+ src2padded.resize(size);
+
+ dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size);
+ univector<u8> temp(dft->temp_size);
+ dft->execute(src1padded, src1padded, temp);
+ dft->execute(src2padded, src2padded, temp);
+ src1padded = src1padded * src2padded;
+ dft->execute(src1padded, src1padded, temp, true);
+ const ST invsize = reciprocal<ST>(static_cast<ST>(size));
+ return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize;
+}
+template univector<f32> convolve<f32>(const univector_ref<const f32>&, const univector_ref<const f32>&, bool);
+template univector<f64> convolve<f64>(const univector_ref<const f64>&, const univector_ref<const f64>&, bool);
+template univector<c32> convolve<c32>(const univector_ref<const c32>&, const univector_ref<const c32>&, bool);
+template univector<c64> convolve<c64>(const univector_ref<const c64>&, const univector_ref<const c64>&, bool);
+
+template <typename T>
+void convolve_filter<T>::process_buffer_impl(T* output, const T* input, size_t size)
{
// Note that the conditionals in the following algorithm are meant to
// reduce complexity in the common cases of either processing complete
@@ -134,34 +142,35 @@ void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size)
while (processed < size)
{
// Calculate how many samples to process this iteration.
- auto const processing = std::min(size - processed, block_size - input_position);
+ auto const processing = std::min(size - processed, this->block_size - this->input_position);
// Prepare input to forward FFT:
- if (processing == block_size)
+ if (processing == this->block_size)
{
// No need to work with saved_input.
- builtin_memcpy(scratch1.data(), input + processed, processing * sizeof(T));
+ builtin_memcpy(this->scratch1.data(), input + processed, processing * sizeof(T));
}
else
{
// Append this iteration's input to the saved_input current block.
- builtin_memcpy(saved_input.data() + input_position, input + processed, processing * sizeof(T));
- builtin_memcpy(scratch1.data(), saved_input.data(), block_size * sizeof(T));
+ builtin_memcpy(this->saved_input.data() + this->input_position, input + processed,
+ processing * sizeof(T));
+ builtin_memcpy(this->scratch1.data(), this->saved_input.data(), this->block_size * sizeof(T));
}
// Forward FFT saved_input block.
- fft.execute(segments[position], scratch1, temp);
+ this->fft.execute(this->segments[this->position], this->scratch1, this->temp);
- if (segments.size() == 1)
+ if (this->segments.size() == 1)
{
// Just one segment/block of history.
// Y_k = H * X_k
- fft_multiply(cscratch, ir_segments[0], segments[0], fft_multiply_pack);
+ fft_multiply(this->cscratch, this->ir_segments[0], this->segments[0], fft_multiply_pack);
}
else
{
// More than one segment/block of history so this is more involved.
- if (input_position == 0)
+ if (this->input_position == 0)
{
// At the start of an input block, we premultiply the history from
// previous input blocks with the extended filter blocks.
@@ -169,139 +178,88 @@ void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size)
// Y_(k-i,i) = H_i * X_(k-i)
// premul += Y_(k-i,i) for i=1,...,N
- fft_multiply(premul, ir_segments[1], segments[(position + 1) % segments.size()],
- fft_multiply_pack);
- for (size_t i = 2; i < segments.size(); i++)
+ fft_multiply(this->premul, this->ir_segments[1],
+ this->segments[(this->position + 1) % this->segments.size()], fft_multiply_pack);
+ for (size_t i = 2; i < this->segments.size(); i++)
{
- const size_t n = (position + i) % segments.size();
- fft_multiply_accumulate(premul, ir_segments[i], segments[n], fft_multiply_pack);
+ const size_t n = (this->position + i) % this->segments.size();
+ fft_multiply_accumulate(this->premul, this->ir_segments[i], this->segments[n],
+ fft_multiply_pack);
}
}
// Y_(k,0) = H_0 * X_k
// Y_k = premul + Y_(k,0)
- fft_multiply_accumulate(cscratch, premul, ir_segments[0], segments[position], fft_multiply_pack);
+ fft_multiply_accumulate(this->cscratch, this->premul, this->ir_segments[0],
+ this->segments[this->position], fft_multiply_pack);
}
// y_k = IFFT( Y_k )
- fft.execute(scratch2, cscratch, temp, cinvert_t{});
+ this->fft.execute(this->scratch2, this->cscratch, this->temp, cinvert_t{});
// z_k = y_k + overlap
process(make_univector(output + processed, processing),
- scratch2.slice(input_position, processing) + overlap.slice(input_position, processing));
+ this->scratch2.slice(this->input_position, processing) +
+ this->overlap.slice(this->input_position, processing));
- input_position += processing;
+ this->input_position += processing;
processed += processing;
// If a whole block was processed, prepare for next block.
- if (input_position == block_size)
+ if (this->input_position == this->block_size)
{
// Input block k is complete. Move to (k+1)-th input block.
- input_position = 0;
+ this->input_position = 0;
// Zero out the saved_input if it will be used in the next iteration.
auto const remaining = size - processed;
- if (remaining < block_size && remaining > 0)
+ if (remaining < this->block_size && remaining > 0)
{
- process(saved_input, zeros());
+ process(this->saved_input, zeros());
}
- builtin_memcpy(overlap.data(), scratch2.data() + block_size, block_size * sizeof(T));
+ builtin_memcpy(this->overlap.data(), this->scratch2.data() + this->block_size,
+ this->block_size * sizeof(T));
- position = position > 0 ? position - 1 : segments.size() - 1;
+ this->position = this->position > 0 ? this->position - 1 : this->segments.size() - 1;
}
}
}
-template <typename T>
-void convolve_filter<T>::reset()
-{
- for (auto& segment : segments)
- {
- process(segment, zeros());
- }
- position = 0;
- process(saved_input, zeros());
- input_position = 0;
- process(overlap, zeros());
-}
-
-namespace intrinsics
-{
-
-template univector<float> convolve<float>(const univector_ref<const float>&,
- const univector_ref<const float>&);
-template univector<complex<float>> convolve<complex<float>>(const univector_ref<const complex<float>>&,
- const univector_ref<const complex<float>>&);
-template univector<float> correlate<float>(const univector_ref<const float>&,
- const univector_ref<const float>&);
-template univector<complex<float>> correlate<complex<float>>(const univector_ref<const complex<float>>&,
- const univector_ref<const complex<float>>&);
-
-template univector<float> autocorrelate<float>(const univector_ref<const float>&);
-template univector<complex<float>> autocorrelate<complex<float>>(const univector_ref<const complex<float>>&);
-
-} // namespace intrinsics
-
-template convolve_filter<float>::convolve_filter(size_t, size_t);
-template convolve_filter<complex<float>>::convolve_filter(size_t, size_t);
+template class convolve_filter<float>;
+template class convolve_filter<double>;
+template class convolve_filter<complex<float>>;
+template class convolve_filter<complex<double>>;
-template convolve_filter<float>::convolve_filter(const univector_ref<const float>&, size_t);
-template convolve_filter<complex<float>>::convolve_filter(const univector_ref<const complex<float>>&, size_t);
+} // namespace impl
-template void convolve_filter<float>::set_data(const univector_ref<const float>&);
-template void convolve_filter<complex<float>>::set_data(const univector_ref<const complex<float>>&);
-
-template void convolve_filter<float>::process_buffer(float* output, const float* input, size_t size);
-template void convolve_filter<complex<float>>::process_buffer(complex<float>* output,
- const complex<float>* input, size_t size);
-
-template void convolve_filter<float>::reset();
-template void convolve_filter<complex<float>>::reset();
+} // namespace CMT_ARCH_NAME
-namespace intrinsics
+#ifdef CMT_MULTI_NEEDS_GATE
+namespace internal_generic
{
+template <typename T>
+univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2, bool correlate)
+{
+ CMT_MULTI_GATE(return ns::impl::convolve(src1, src2, correlate));
+}
-template univector<double> convolve<double>(const univector_ref<const double>&,
- const univector_ref<const double>&);
-template univector<complex<double>> convolve<complex<double>>(const univector_ref<const complex<double>>&,
- const univector_ref<const complex<double>>&);
-template univector<double> correlate<double>(const univector_ref<const double>&,
- const univector_ref<const double>&);
-template univector<complex<double>> correlate<complex<double>>(const univector_ref<const complex<double>>&,
- const univector_ref<const complex<double>>&);
-
-template univector<double> autocorrelate<double>(const univector_ref<const double>&);
-template univector<complex<double>> autocorrelate<complex<double>>(
- const univector_ref<const complex<double>>&);
-
-} // namespace intrinsics
-
-template convolve_filter<double>::convolve_filter(size_t, size_t);
-template convolve_filter<complex<double>>::convolve_filter(size_t, size_t);
-
-template convolve_filter<double>::convolve_filter(const univector_ref<const double>&, size_t);
-template convolve_filter<complex<double>>::convolve_filter(const univector_ref<const complex<double>>&,
- size_t);
-
-template void convolve_filter<double>::set_data(const univector_ref<const double>&);
-template void convolve_filter<complex<double>>::set_data(const univector_ref<const complex<double>>&);
-
-template void convolve_filter<double>::process_buffer(double* output, const double* input, size_t size);
-template void convolve_filter<complex<double>>::process_buffer(complex<double>* output,
- const complex<double>* input, size_t size);
+template univector<f32> convolve<f32>(const univector_ref<const f32>&, const univector_ref<const f32>&, bool);
+template univector<f64> convolve<f64>(const univector_ref<const f64>&, const univector_ref<const f64>&, bool);
+template univector<c32> convolve<c32>(const univector_ref<const c32>&, const univector_ref<const c32>&, bool);
+template univector<c64> convolve<c64>(const univector_ref<const c64>&, const univector_ref<const c64>&, bool);
-template void convolve_filter<double>::reset();
-template void convolve_filter<complex<double>>::reset();
+} // namespace internal_generic
template <typename T>
-filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size)
+void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size)
{
- return new convolve_filter<T>(taps, block_size);
+ CMT_MULTI_GATE(
+ reinterpret_cast<ns::impl::convolve_filter<T>*>(this)->process_buffer_impl(output, input, size));
}
-template filter<float>* make_convolve_filter(const univector_ref<const float>&, size_t);
-template filter<complex<float>>* make_convolve_filter(const univector_ref<const complex<float>>&, size_t);
-template filter<double>* make_convolve_filter(const univector_ref<const double>&, size_t);
-template filter<complex<double>>* make_convolve_filter(const univector_ref<const complex<double>>&, size_t);
+template class convolve_filter<float>;
+template class convolve_filter<double>;
+template class convolve_filter<complex<float>>;
+template class convolve_filter<complex<double>>;
+#endif
-} // namespace CMT_ARCH_NAME
} // namespace kfr
diff --git a/src/dft/dft.cpp b/src/dft/dft.cpp
@@ -0,0 +1,60 @@
+/** @addtogroup dft
+ * @{
+ */
+/*
+ Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com)
+ This file is part of KFR
+
+ KFR is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ KFR is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with KFR.
+
+ If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ Buying a commercial license is mandatory as soon as you develop commercial activities without
+ disclosing the source code of your own applications.
+ See https://www.kfrlib.com for details.
+ */
+
+#include <kfr/dft/fft.hpp>
+#include <kfr/multiarch.h>
+
+namespace kfr
+{
+
+CMT_MULTI_PROTO(namespace impl {
+ template <typename T>
+ void dft_initialize(dft_plan<T> & plan);
+ template <typename T>
+ void dft_real_initialize(dft_plan_real<T> & plan);
+})
+
+#ifdef CMT_MULTI_NEEDS_GATE
+
+template <typename T>
+void dft_initialize(dft_plan<T>& plan)
+{
+ CMT_MULTI_GATE(ns::impl::dft_initialize(plan));
+}
+template <typename T>
+void dft_real_initialize(dft_plan_real<T>& plan)
+{
+ CMT_MULTI_GATE(ns::impl::dft_real_initialize(plan));
+}
+
+template void dft_initialize<float>(dft_plan<float>&);
+template void dft_initialize<double>(dft_plan<double>&);
+template void dft_real_initialize<float>(dft_plan_real<float>&);
+template void dft_real_initialize<double>(dft_plan_real<double>&);
+
+#endif
+
+} // namespace kfr
diff --git a/src/dft/fft-impl.hpp b/src/dft/fft-impl.hpp
@@ -1906,8 +1906,10 @@ void from_fmt(size_t real_size, complex<T>* rtwiddle, complex<T>* out, const com
cwrite<1>(out, dc);
}
+#ifndef KFR_DFT_NO_NPo2
template <typename T>
void init_dft(dft_plan<T>* self, size_t size, dft_order);
+#endif
template <typename T>
KFR_INTRINSIC void initialize_stages(dft_plan<T>* self)
@@ -1926,6 +1928,8 @@ KFR_INTRINSIC void initialize_stages(dft_plan<T>* self)
}
}
+namespace impl
+{
template <typename T>
void dft_initialize(dft_plan<T>& plan)
{
@@ -1935,6 +1939,7 @@ void dft_initialize(dft_plan<T>& plan)
initialize_data(&plan);
initialize_order(&plan);
}
+} // namespace impl
template <typename T>
struct dft_stage_real_repack : dft_stage<T>
@@ -1977,6 +1982,8 @@ public:
}
};
+namespace impl
+{
template <typename T>
void dft_real_initialize(dft_plan_real<T>& plan)
{
@@ -1989,6 +1996,7 @@ void dft_real_initialize(dft_plan_real<T>& plan)
initialize_data(&plan);
initialize_order(&plan);
}
+} // namespace impl
} // namespace CMT_ARCH_NAME
diff --git a/src/dft/fft-templates.hpp b/src/dft/fft-templates.hpp
@@ -31,8 +31,11 @@ namespace kfr
{
inline namespace CMT_ARCH_NAME
{
+namespace impl
+{
template void dft_initialize<FLOAT>(dft_plan<FLOAT>& plan);
template void dft_real_initialize<FLOAT>(dft_plan_real<FLOAT>& plan);
+} // namespace impl
} // namespace CMT_ARCH_NAME
} // namespace kfr
diff --git a/src/dsp/CMakeLists.txt b/src/dsp/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.12)
+
+add_kfr_library(NAME kfr_dsp MULTIARCH SOURCES ${KFR_DSP_SRC})
+
+if (KFR_INSTALL_LIBRARIES)
+ install(
+ TARGETS ${kfr_dsp_LIBS}
+ ARCHIVE DESTINATION lib
+ LIBRARY DESTINATION lib
+ RUNTIME DESTINATION bin)
+endif ()
diff --git a/src/dsp/biquad.cpp b/src/dsp/biquad.cpp
@@ -0,0 +1,66 @@
+/** @addtogroup dft
+ * @{
+ */
+/*
+ Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com)
+ This file is part of KFR
+
+ KFR is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ KFR is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with KFR.
+
+ If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ Buying a commercial license is mandatory as soon as you develop commercial activities without
+ disclosing the source code of your own applications.
+ See https://www.kfrlib.com for details.
+ */
+#include <kfr/multiarch.h>
+#include <kfr/dsp/biquad.hpp>
+
+namespace kfr
+{
+
+CMT_MULTI_PROTO(namespace impl {
+ template <typename T>
+ expression_handle<T, 1> create_biquad_filter(const biquad_params<T>* bq, size_t count);
+} // namespace impl
+)
+
+inline namespace CMT_ARCH_NAME
+{
+namespace impl
+{
+template <typename T>
+expression_handle<T, 1> create_biquad_filter(const biquad_params<T>* bq, size_t count)
+{
+ KFR_LOGIC_CHECK(count <= 64, "Too many biquad filters: ", count);
+ return biquad<64>(bq, count, placeholder<T>());
+}
+template expression_handle<float, 1> create_biquad_filter<float>(const biquad_params<float>*, size_t);
+template expression_handle<double, 1> create_biquad_filter<double>(const biquad_params<double>*, size_t);
+} // namespace impl
+} // namespace CMT_ARCH_NAME
+
+#ifdef CMT_MULTI_NEEDS_GATE
+
+template <typename T>
+biquad_filter<T>::biquad_filter(const biquad_params<T>* bq, size_t count)
+{
+ CMT_MULTI_GATE(this->filter_expr = ns::impl::create_biquad_filter<T>(bq, count));
+}
+
+template biquad_filter<float>::biquad_filter(const biquad_params<float>*, size_t);
+template biquad_filter<double>::biquad_filter(const biquad_params<double>*, size_t);
+
+#endif
+
+} // namespace kfr
diff --git a/src/dsp/fir.cpp b/src/dsp/fir.cpp
@@ -0,0 +1,92 @@
+/** @addtogroup dft
+ * @{
+ */
+/*
+ Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com)
+ This file is part of KFR
+
+ KFR is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ KFR is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with KFR.
+
+ If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ Buying a commercial license is mandatory as soon as you develop commercial activities without
+ disclosing the source code of your own applications.
+ See https://www.kfrlib.com for details.
+ */
+#include <kfr/dsp/fir.hpp>
+#include <kfr/multiarch.h>
+
+namespace kfr
+{
+
+CMT_MULTI_PROTO(namespace impl {
+ template <typename T, typename U>
+ class fir_filter : public kfr::fir_filter<T, U>
+ {
+ public:
+ using kfr::fir_filter<T, U>::fir_filter;
+
+ void process_buffer_impl(U* dest, const U* src, size_t size);
+ void process_expression_impl(U* dest, const expression_handle<U, 1>& src, size_t size);
+ };
+} // namespace impl
+)
+
+inline namespace CMT_ARCH_NAME
+{
+namespace impl
+{
+
+template <typename T, typename U>
+void fir_filter<T, U>::process_buffer_impl(U* dest, const U* src, size_t size)
+{
+ make_univector(dest, size) = fir(this->state, make_univector(src, size));
+}
+template <typename T, typename U>
+void fir_filter<T, U>::process_expression_impl(U* dest, const expression_handle<U, 1>& src, size_t size)
+{
+ make_univector(dest, size) = fir(this->state, src);
+}
+
+template class fir_filter<float, float>;
+template class fir_filter<double, double>;
+template class fir_filter<float, double>;
+template class fir_filter<double, float>;
+template class fir_filter<float, complex<float>>;
+template class fir_filter<double, complex<double>>;
+
+} // namespace impl
+} // namespace CMT_ARCH_NAME
+
+#ifdef CMT_MULTI_NEEDS_GATE
+
+template <typename T, typename U>
+void fir_filter<T, U>::process_buffer(U* dest, const U* src, size_t size)
+{
+ make_univector(dest, size) = fir(this->state, make_univector(src, size));
+}
+template <typename T, typename U>
+void fir_filter<T, U>::process_expression(U* dest, const expression_handle<U, 1>& src, size_t size)
+{
+ make_univector(dest, size) = fir(this->state, src);
+}
+template class fir_filter<float, float>;
+template class fir_filter<double, double>;
+template class fir_filter<float, double>;
+template class fir_filter<double, float>;
+template class fir_filter<float, complex<float>>;
+template class fir_filter<double, complex<double>>;
+
+#endif
+
+} // namespace kfr
diff --git a/src/dsp/sample_rate_conversion.cpp b/src/dsp/sample_rate_conversion.cpp
@@ -0,0 +1,179 @@
+/** @addtogroup dft
+ * @{
+ */
+/*
+ Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com)
+ This file is part of KFR
+
+ KFR is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ KFR is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with KFR.
+
+ If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ Buying a commercial license is mandatory as soon as you develop commercial activities without
+ disclosing the source code of your own applications.
+ See https://www.kfrlib.com for details.
+ */
+#include <kfr/dsp/sample_rate_conversion.hpp>
+#include <kfr/multiarch.h>
+
+namespace kfr
+{
+CMT_MULTI_PROTO(namespace impl {
+ template <typename T>
+ struct samplerate_converter : public kfr::samplerate_converter<T>
+ {
+ public:
+ using itype = typename kfr::samplerate_converter<T>::itype;
+ using ftype = typename kfr::samplerate_converter<T>::ftype;
+ void init(sample_rate_conversion_quality quality, itype interpolation_factor, itype decimation_factor,
+ subtype<T> scale, subtype<T> cutoff);
+ size_t process_impl(univector_ref<T> output, univector_ref<const T> input);
+ };
+} // namespace impl
+)
+
+inline namespace CMT_ARCH_NAME
+{
+namespace impl
+{
+
+template <typename T>
+void samplerate_converter<T>::init(sample_rate_conversion_quality quality, itype interpolation_factor,
+ itype decimation_factor, subtype<T> scale, subtype<T> cutoff)
+{
+ this->kaiser_beta = this->window_param(quality);
+ this->depth = static_cast<itype>(this->filter_order(quality));
+ this->input_position = 0;
+ this->output_position = 0;
+
+ const i64 gcf = gcd(interpolation_factor, decimation_factor);
+ interpolation_factor /= gcf;
+ decimation_factor /= gcf;
+
+ this->taps = this->depth * interpolation_factor;
+ this->order = size_t(this->depth * interpolation_factor - 1);
+
+ this->interpolation_factor = interpolation_factor;
+ this->decimation_factor = decimation_factor;
+
+ const itype halftaps = this->taps / 2;
+ this->filter = univector<T>(size_t(this->taps), T());
+ this->delay = univector<T>(size_t(this->depth), T());
+
+ cutoff = cutoff - this->transition_width() / c_pi<ftype, 4>;
+
+ cutoff = cutoff / std::max(decimation_factor, interpolation_factor);
+
+ for (itype j = 0, jj = 0; j < this->taps; j++)
+ {
+ this->filter[size_t(j)] =
+ sinc((jj - halftaps) * cutoff * c_pi<ftype, 2>) * this->window(ftype(jj) / ftype(this->taps - 1));
+ jj += size_t(interpolation_factor);
+ if (jj >= this->taps)
+ jj = jj - this->taps + 1;
+ }
+
+ const T s = reciprocal(sum(this->filter)) * static_cast<ftype>(interpolation_factor * scale);
+ this->filter = this->filter * s;
+}
+
+template <typename T>
+size_t samplerate_converter<T>::process_impl(univector_ref<T> output, univector_ref<const T> input)
+{
+ const itype required_input_size = this->input_size_for_output(output.size());
+
+ const itype input_size = input.size();
+ for (size_t i = 0; i < output.size(); i++)
+ {
+ const itype intermediate_index =
+ this->output_position_to_intermediate(static_cast<itype>(i) + this->output_position);
+ const itype intermediate_start = intermediate_index - this->taps + 1;
+ const std::lldiv_t input_pos =
+ floor_div(intermediate_start + this->interpolation_factor - 1, this->interpolation_factor);
+ const itype input_start = input_pos.quot; // first input sample
+ const itype tap_start = this->interpolation_factor - 1 - input_pos.rem;
+ const univector_ref<T> tap_ptr = this->filter.slice(static_cast<size_t>(tap_start * this->depth));
+
+ if (input_start >= this->input_position + input_size)
+ {
+ output[i] = T(0);
+ }
+ else if (input_start >= this->input_position)
+ {
+ output[i] = dotproduct(
+ truncate(padded(input.slice(input_start - this->input_position, this->depth)), this->depth),
+ tap_ptr.truncate(this->depth));
+ }
+ else
+ {
+ const itype prev_count = this->input_position - input_start;
+ output[i] = dotproduct(this->delay.slice(size_t(this->depth - prev_count)),
+ tap_ptr.truncate(prev_count)) +
+ dotproduct(truncate(padded(input.truncate(size_t(this->depth - prev_count))),
+ size_t(this->depth - prev_count)),
+ tap_ptr.slice(size_t(prev_count), size_t(this->depth - prev_count)));
+ }
+ }
+
+ if (required_input_size >= this->depth)
+ {
+ this->delay.slice(0, this->delay.size()) =
+ padded(input.slice(size_t(required_input_size - this->depth)));
+ }
+ else
+ {
+ this->delay.truncate(size_t(this->depth - required_input_size)) =
+ this->delay.slice(size_t(required_input_size));
+ this->delay.slice(size_t(this->depth - required_input_size)) = padded(input);
+ }
+
+ this->input_position += required_input_size;
+ this->output_position += output.size();
+
+ return required_input_size;
+}
+
+template struct samplerate_converter<float>;
+template struct samplerate_converter<double>;
+template struct samplerate_converter<complex<float>>;
+template struct samplerate_converter<complex<double>>;
+
+} // namespace impl
+} // namespace CMT_ARCH_NAME
+
+#ifdef CMT_MULTI_NEEDS_GATE
+
+template <typename T>
+samplerate_converter<T>::samplerate_converter(sample_rate_conversion_quality quality,
+ itype interpolation_factor, itype decimation_factor,
+ ftype scale, ftype cutoff)
+{
+ CMT_MULTI_GATE(reinterpret_cast<ns::impl::samplerate_converter<T>*>(this)->init(
+ quality, interpolation_factor, decimation_factor, scale, cutoff));
+}
+
+template <typename T>
+size_t samplerate_converter<T>::process_impl(univector_ref<T> output, univector_ref<const T> input)
+{
+ CMT_MULTI_GATE(
+ return reinterpret_cast<ns::impl::samplerate_converter<T>*>(this)->process_impl(output, input));
+}
+
+template struct samplerate_converter<float>;
+template struct samplerate_converter<double>;
+template struct samplerate_converter<complex<float>>;
+template struct samplerate_converter<complex<double>>;
+
+#endif
+
+} // namespace kfr
diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.12)
+
+add_kfr_library(NAME kfr_io SOURCES ${KFR_IO_SRC})
+
+if (KFR_INSTALL_LIBRARIES)
+ install(
+ TARGETS kfr_io
+ ARCHIVE DESTINATION lib
+ LIBRARY DESTINATION lib
+ RUNTIME DESTINATION bin)
+endif ()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -137,7 +137,7 @@ endif ()
add_executable(all_tests all_tests.cpp ${ALL_TESTS_CPP})
target_compile_definitions(all_tests PRIVATE KFR_NO_MAIN)
-target_link_libraries(all_tests kfr use_arch)
+target_link_libraries(all_tests kfr use_arch kfr_dsp)
if (KFR_ENABLE_DFT)
target_link_libraries(all_tests kfr_dft)
target_link_libraries(dft_test kfr_dft)
@@ -162,7 +162,7 @@ function (add_x86_test ARCH)
if (KFR_ENABLE_DFT)
target_sources(all_tests_${NAME} PRIVATE ${KFR_DFT_SRC})
endif ()
- target_link_libraries(all_tests_${NAME} kfr)
+ target_link_libraries(all_tests_${NAME} kfr kfr_dsp)
target_set_arch(all_tests_${NAME} PRIVATE ${ARCH})
target_compile_definitions(all_tests_${NAME} PRIVATE KFR_NO_MAIN)
target_compile_definitions(all_tests_${NAME} PUBLIC KFR_ENABLE_FLAC=1)
diff --git a/tests/unit/dsp/biquad.cpp b/tests/unit/dsp/biquad.cpp
@@ -112,6 +112,14 @@ TEST(biquad_lowpass2)
CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0);
});
}
+
+TEST(biquad_filter)
+{
+ biquad_params<float> params[16];
+ auto f = biquad_filter<float>(params);
+ float buf[256];
+ f.apply(buf);
+}
} // namespace CMT_ARCH_NAME
} // namespace kfr
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
@@ -21,7 +21,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin)
add_executable(sample_rate_converter sample_rate_converter.cpp)
-target_link_libraries(sample_rate_converter kfr kfr_io use_arch)
+target_link_libraries(sample_rate_converter kfr kfr_dsp kfr_io use_arch)
add_executable(ebu_test ebu_test.cpp)
target_link_libraries(ebu_test kfr kfr_io use_arch)
diff --git a/update-sources.py b/update-sources.py
@@ -39,7 +39,8 @@ list_sources("KFR_IO_SRC", "include/kfr/io", ['*.hpp', '*.h'])
list_sources("KFR_RUNTIME_SRC", "include/kfr/runtime", ['*.hpp', '*.h'])
list_sources("KFR_GRAPHICS_SRC", "include/kfr/graphics", ['*.hpp', '*.h'])
list_sources("KFR_SRC", "include", ['*.hpp', '*.h'])
-list_sources("KFR_DFT_SRC", "src/dft", ['*.cpp'], ["dft-src.cpp"])
+list_sources("KFR_DFT_SRC", "src/dft", ['*.cpp'])
+list_sources("KFR_DSP_SRC", "src/dsp", ['*.cpp'])
list_sources("KFR_IO_SRC", "src/io", ['*.cpp'])
list_sources("KFR_UNITTEST_SRC", "tests/unit", ['*.cpp'])