kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 8ad8c1071c9bf81cad8da4f118af1d9978932e50
parent 3c1e2102dc51cdb6f269b74ed2d635e252d6faf5
Author: [email protected] <[email protected]>
Date:   Wed, 17 Jan 2024 07:13:02 +0000

Multiarch refactoring

Diffstat:
MCMakeLists.txt | 161+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Acmake/add_kfr_library.cmake | 37+++++++++++++++++++++++++++++++++++++
Mcmake/detect_cpu.cpp | 7+++++--
Acmake/link_as_whole.cmake | 13+++++++++++++
Mdocs/docs/installation.md | 4++--
Mexamples/CMakeLists.txt | 8++++----
Minclude/kfr/base/filter.hpp | 1+
Minclude/kfr/capi.h | 6++++++
Minclude/kfr/cident.h | 98++++++++-----------------------------------------------------------------------
Minclude/kfr/dft/convolution.hpp | 50+++++++++++++++++++-------------------------------
Minclude/kfr/dft/fft.hpp | 81+++++++++++++++++++++----------------------------------------------------------
Minclude/kfr/dsp/biquad.hpp | 58++++++++++++++++++++--------------------------------------
Minclude/kfr/dsp/fir.hpp | 30+++++++-----------------------
Minclude/kfr/dsp/sample_rate_conversion.hpp | 98+++++++++----------------------------------------------------------------------
Minclude/kfr/kfr.h | 9++++++++-
Ainclude/kfr/multiarch.h | 196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msources.cmake | 11+++++++++++
Msrc/capi/CMakeLists.txt | 135+++++++++++++++++--------------------------------------------------------------
Msrc/capi/capi.cpp | 495++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Dsrc/capi/dsp.cpp | 28----------------------------
Msrc/dft/CMakeLists.txt | 64+++++++---------------------------------------------------------
Msrc/dft/convolution-impl.cpp | 280++++++++++++++++++++++++++++++++++---------------------------------------------
Asrc/dft/dft.cpp | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/dft/fft-impl.hpp | 8++++++++
Msrc/dft/fft-templates.hpp | 3+++
Asrc/dsp/CMakeLists.txt | 11+++++++++++
Asrc/dsp/biquad.cpp | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/dsp/fir.cpp | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/dsp/sample_rate_conversion.cpp | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/io/CMakeLists.txt | 11+++++++++++
Mtests/CMakeLists.txt | 4++--
Mtests/unit/dsp/biquad.cpp | 8++++++++
Mtools/CMakeLists.txt | 2+-
Mupdate-sources.py | 3++-
34 files changed, 1355 insertions(+), 962 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -22,6 +22,16 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) + if (CPU_ARCH) if (NOT KFR_ARCH) message( @@ -36,12 +46,6 @@ if (CPU_ARCH) endif () endif () -if (WIN32 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set(CMAKE_INSTALL_PREFIX - "" - CACHE STRING "Reset install prefix on Win32" FORCE) -endif () - set(X86 FALSE) if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(X86 TRUE) @@ -57,23 +61,23 @@ else () set(CLANG 0) endif () +include(cmake/target_set_arch.cmake) +include(cmake/link_as_whole.cmake) +include(cmake/add_kfr_library.cmake) + # Include autogenerated list of source files include(sources.cmake) include(CMakeDependentOption) -add_subdirectory(src/io) +option(KFR_ENABLE_MULTIARCH + "Multiple architectures will be built with runtime dispatch" ON) option(ENABLE_TESTS "Enable KFR tests" OFF) -cmake_dependent_option(ENABLE_EXAMPLES "Enable KFR examples" ON "ENABLE_TESTS" OFF) +cmake_dependent_option(ENABLE_EXAMPLES "Enable KFR examples" ON "ENABLE_TESTS" + OFF) if (CLANG) option(KFR_ENABLE_DFT "Enable DFT and related algorithms." ON) option(KFR_ENABLE_DFT_NP "Enable Non-power of 2 DFT" ON) - if (X86) - option( - KFR_ENABLE_DFT_MULTIARCH - "Build DFT static libraries for various architectures. Requires Clang" - OFF) - endif () else () option(KFR_ENABLE_DFT "Enable DFT and related algorithms." OFF) option(KFR_ENABLE_DFT_NP "Enable Non-power of 2 DFT" OFF) @@ -90,29 +94,29 @@ option(KFR_INSTALL_LIBRARIES "Include libraries in installation" ON) mark_as_advanced(KFR_ENABLE_ASMTEST) mark_as_advanced(KFR_REGENERATE_TESTS) mark_as_advanced(KFR_DISABLE_CLANG_EXTENSIONS) +mark_as_advanced(KFR_STD_COMPLEX) if (KFR_ENABLE_CAPI_BUILD AND NOT KFR_ENABLE_DFT) - message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT to be enabled") + message( + FATAL_ERROR + "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT to be enabled") endif () if (KFR_ENABLE_CAPI_BUILD AND NOT KFR_ENABLE_DFT_NP) - message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_NP to be enabled") -endif () -if (KFR_ENABLE_CAPI_BUILD AND KFR_ENABLE_DFT_MULTIARCH) - message(FATAL_ERROR "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_MULTIARCH to be disabled") + message( + FATAL_ERROR + "KFR_ENABLE_CAPI_BUILD requires KFR_ENABLE_DFT_NP to be enabled") endif () -include(cmake/target_set_arch.cmake) - -function (link_as_whole TARGET TYPE LIBRARY) - if (APPLE) - target_link_options(${TARGET} ${TYPE} -Wl,-force_load $<TARGET_FILE:${LIBRARY}>) - elseif (WIN32) - target_link_options(${TARGET} ${TYPE} /WHOLEARCHIVE:$<TARGET_FILE:${LIBRARY}>) - else () - target_link_options(${TARGET} ${TYPE} -Wl,--push-state,--whole-archive $<TARGET_FILE:${LIBRARY}> -Wl,--pop-state) - endif () -endfunction() +function (add_arch_library NAME ARCH SRCS DEFS) + add_library(${NAME}_${ARCH} ${SRCS}) + target_link_libraries(${NAME}_${ARCH} kfr) + target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH}) + target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS}) + target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH}) +endfunction () +add_subdirectory(src/dsp) +add_subdirectory(src/io) if (KFR_ENABLE_DFT) add_subdirectory(src/dft) endif () @@ -121,16 +125,40 @@ if (KFR_ENABLE_CAPI_BUILD) endif () if (NOT KFR_ARCH) - set(KFR_ARCH detect) + set(KFR_ARCH target) +endif () + +if (KFR_ARCH STREQUAL "detect") + set(KFR_ARCH host) +endif () + +set(DETECT_NAMES host target) +if (X86) + set(ALLOWED_ARCHS + generic + sse + sse2 + sse3 + ssse3 + sse41 + sse42 + avx + avx2 + avx512) +else () + set(ALLOWED_ARCHS generic neon neon64) endif () -if (KFR_ARCH STREQUAL "detect" AND X86) +if (KFR_ARCH IN_LIST DETECT_NAMES) + message(STATUS "Detecting ${KFR_ARCH} architecture") try_run( RUN_RESULT COMPILE_RESULT "${CMAKE_CURRENT_BINARY_DIR}/tmpdir" ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CMAKE_CURRENT_SOURCE_DIR}/include" - -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_STANDARD_REQUIRED=ON + -DCMAKE_CXX_STANDARD=17 + -DCMAKE_CXX_STANDARD_REQUIRED=ON + "-DCOMPILE_DEFINITIONS=-DCPU_${KFR_ARCH}=1" -DCMAKE_CXX_EXTENSIONS=ON COMPILE_OUTPUT_VARIABLE COMPILE_OUT RUN_OUTPUT_VARIABLE RUN_OUT) @@ -146,7 +174,7 @@ if (KFR_ARCH STREQUAL "detect" AND X86) message(STATUS COMPILE_RESULT = ${COMPILE_RESULT}) message(STATUS RUN_RESULT = ${RUN_RESULT}) message(STATUS COMPILE_OUT = ${COMPILE_OUT}) - message(STATUS RUN_OUT = ${RUN_OUT}) + message(FATAL_ERROR RUN_OUT = ${RUN_OUT}) endif () else () message( @@ -155,6 +183,24 @@ else () ) endif () +if (NOT KFR_ARCH IN_LIST ALLOWED_ARCHS) + message(FATAL_ERROR "Incorrect architecture set by KFR_ARCH: ${KFR_ARCH}") +endif () + +if (NOT KFR_ARCHS) + if (X86) + set(KFR_ARCHS sse2 sse41 avx avx2 avx512) + else () + set(KFR_ARCHS ${KFR_ARCH}) + endif () +endif () + +string (REPLACE ";" ", " KFR_ARCHS_COMMA "${KFR_ARCHS}") + +if (KFR_ENABLE_MULTIARCH) + add_compile_definitions(KFR_ENABLED_ARCHS="${KFR_ARCHS_COMMA}") +endif () + add_library(use_arch INTERFACE) target_set_arch(use_arch INTERFACE ${KFR_ARCH}) @@ -184,12 +230,7 @@ target_compile_options(kfr INTERFACE "$<$<CONFIG:DEBUG>:-DKFR_DEBUG>") if (APPLE) target_compile_options(kfr INTERFACE -faligned-allocation) endif () -if (NOT IOS) - if (NOT MSVC OR CLANG) - # target_compile_options(kfr - # INTERFACE "${CLANG_ARG_PREFIX}-mstackrealign") - endif () -endif () + if (MSVC) target_compile_options(kfr INTERFACE -bigobj -EHsc) else () @@ -205,7 +246,8 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(kfr INTERFACE -Wno-ignored-qualifiers -Wno-psabi) endif () if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - target_compile_options(kfr INTERFACE -Wno-c++1z-extensions -Wno-psabi -Wno-unknown-warning-option) + target_compile_options(kfr INTERFACE -Wno-c++1z-extensions -Wno-psabi + -Wno-unknown-warning-option) endif () if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_options(kfr INTERFACE /wd4141) @@ -218,19 +260,10 @@ if (KFR_EXTENDED_TESTS) target_compile_definitions(kfr INTERFACE -DKFR_EXTENDED_TESTS) endif () -if (X86) - add_executable(detect_cpu ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp) - target_link_libraries(detect_cpu PRIVATE kfr) - target_set_arch(detect_cpu PRIVATE generic) -endif () - -function (add_arch_library NAME ARCH SRCS DEFS) - add_library(${NAME}_${ARCH} ${SRCS}) - target_link_libraries(${NAME}_${ARCH} kfr) - target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH}) - target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS}) - target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH}) -endfunction () +# if (X86) add_executable(detect_cpu +# ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect_cpu.cpp) +# target_link_libraries(detect_cpu PRIVATE kfr) target_set_arch(detect_cpu +# PRIVATE generic) endif () if (ENABLE_EXAMPLES) add_subdirectory(examples) @@ -240,26 +273,6 @@ if (ENABLE_TESTS) add_subdirectory(tests) endif () -add_library(kfr_io ${KFR_IO_SRC}) -target_link_libraries(kfr_io kfr) -target_link_libraries(kfr_io use_arch) - -if (KFR_INSTALL_LIBRARIES) - install( - TARGETS kfr kfr_io - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) - - if (KFR_ENABLE_DFT AND KFR_ENABLE_CAPI_BUILD) - install( - TARGETS kfr_capi - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) - endif () -endif () - set(kfr_defines) function (append_defines_from target) diff --git a/cmake/add_kfr_library.cmake b/cmake/add_kfr_library.cmake @@ -0,0 +1,37 @@ +function (add_kfr_library) + + cmake_parse_arguments(LIB "MULTIARCH" "NAME" "SOURCES;LIBRARIES;DEFINITIONS;OPTIONS" + ${ARGN}) + + set(${LIB_NAME}_LIBS PARENT_SCOPE) + if (KFR_ENABLE_MULTIARCH AND LIB_MULTIARCH) + add_library(${LIB_NAME} INTERFACE) + foreach (ARCH IN LISTS KFR_ARCHS) + add_library(${LIB_NAME}_${ARCH} STATIC ${LIB_SOURCES}) + target_compile_definitions(${LIB_NAME}_${ARCH} PRIVATE CMT_MULTI=1) + foreach (ENABLED_ARCH IN LISTS KFR_ARCHS) + string(TOUPPER ${ENABLED_ARCH} ENABLED_ARCH_UPPER) + target_compile_definitions(${LIB_NAME}_${ARCH} PRIVATE CMT_MULTI_ENABLED_${ENABLED_ARCH_UPPER}=1) + endforeach() + list(APPEND ${LIB_NAME}_LIBS ${LIB_NAME}_${ARCH}) + target_link_libraries(${LIB_NAME} INTERFACE ${LIB_NAME}_${ARCH}) + target_set_arch(${LIB_NAME}_${ARCH} PRIVATE ${ARCH}) + endforeach () + list(GET KFR_ARCHS 0 BASE_ARCH) + target_compile_definitions(${LIB_NAME}_${BASE_ARCH} PRIVATE CMT_BASE_ARCH=1) + + link_as_whole(${LIB_NAME} INTERFACE ${LIB_NAME}_${BASE_ARCH}) + else () + add_library(${LIB_NAME} STATIC ${LIB_SOURCES}) + list(APPEND ${LIB_NAME}_LIBS ${LIB_NAME}) + target_set_arch(${LIB_NAME} PRIVATE ${KFR_ARCH}) + endif () + + foreach (LIB IN LISTS ${LIB_NAME}_LIBS) + target_link_libraries(${LIB} PUBLIC kfr) + target_link_libraries(${LIB} PRIVATE ${LIB_LIBRARIES}) + target_compile_definitions(${LIB} PRIVATE ${LIB_DEFINITIONS}) + target_compile_options(${LIB} PRIVATE ${LIB_OPTIONS}) + endforeach () + +endfunction () diff --git a/cmake/detect_cpu.cpp b/cmake/detect_cpu.cpp @@ -4,6 +4,10 @@ using namespace kfr; int main() { +#ifdef CPU_target + cpu_t cpu = cpu_t::native; +#else cpu_t cpu = kfr::internal_generic::detect_cpu(); +#endif printf("%s", cpu_name(cpu)); -} -\ No newline at end of file +} diff --git a/cmake/link_as_whole.cmake b/cmake/link_as_whole.cmake @@ -0,0 +1,13 @@ + +function (link_as_whole TARGET TYPE LIBRARY) + if (APPLE) + target_link_options(${TARGET} ${TYPE} -Wl,-force_load + $<TARGET_FILE:${LIBRARY}>) + elseif (WIN32) + target_link_options(${TARGET} ${TYPE} + /WHOLEARCHIVE:$<TARGET_FILE:${LIBRARY}>) + else () + target_link_options(${TARGET} ${TYPE} -Wl,--push-state,--whole-archive + $<TARGET_FILE:${LIBRARY}> -Wl,--pop-state) + endif () +endfunction () diff --git a/docs/docs/installation.md b/docs/docs/installation.md @@ -286,7 +286,7 @@ fft_specialization<double, 7>(avx2): 0, 128, 3072, 0, 1, 0, 0, 0, 1, 0, 0 #### 2. Multiple architectures (best performance) -Setting `KFR_ENABLE_DFT_MULTIARCH` to `ON` enables multiple architectures. +Setting `KFR_ENABLE_MULTIARCH` to `ON` enables multiple architectures. In this case instead of a single `libkfr_dft.a` multiple arch-specific libraries will be installed. ``` cmake -GNinja -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKFR_ENABLE_DFT_MULTIARCH=ON .. @@ -341,7 +341,7 @@ cmake -GNinja -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCM ninja ninja install # This installs kfr_dft_sse2.lib kfr_dft_sse41.lib kfr_dft_avx.lib kfr_dft_avx2.lib kfr_dft_avx512.lib to CMAKE_BINARY_DIR/install ``` -`KFR_ENABLE_DFT_MULTIARCH=ON` is the key option here. +`KFR_ENABLE_MULTIARCH=ON` is the key option here. ``` /WHOLEARCHIVE:"PATH-TO-INSTALLED-KFR/lib/kfr_dft_sse2.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_sse41.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx2.lib" "PATH-TO-INSTALLED-KFR/lib/kfr_dft_avx512.lib" ``` diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt @@ -24,16 +24,16 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/svg) add_executable(biquads biquads.cpp) -target_link_libraries(biquads kfr use_arch) +target_link_libraries(biquads kfr kfr_dsp use_arch) add_executable(iir iir.cpp) -target_link_libraries(iir kfr use_arch) +target_link_libraries(iir kfr kfr_dsp use_arch) add_executable(window window.cpp) target_link_libraries(window kfr use_arch) add_executable(fir fir.cpp) -target_link_libraries(fir kfr use_arch) +target_link_libraries(fir kfr kfr_dsp use_arch) if (KFR_ENABLE_DFT) target_link_libraries(fir kfr_dft use_arch) @@ -41,7 +41,7 @@ if (KFR_ENABLE_DFT) endif () add_executable(sample_rate_conversion sample_rate_conversion.cpp) -target_link_libraries(sample_rate_conversion kfr kfr_io use_arch) +target_link_libraries(sample_rate_conversion kfr kfr_io kfr_dsp use_arch) if (KFR_ENABLE_DFT) add_executable(dft dft.cpp) diff --git a/include/kfr/base/filter.hpp b/include/kfr/base/filter.hpp @@ -113,6 +113,7 @@ public: explicit expression_filter(expression_handle<T, 1> filter_expr) : filter_expr(std::move(filter_expr)) {} protected: + expression_filter() = default; void process_buffer(T* dest, const T* src, size_t size) override { substitute(filter_expr, to_handle(make_univector(src, size))); diff --git a/include/kfr/capi.h b/include/kfr/capi.h @@ -55,9 +55,13 @@ #else #define KFR_API_SPEC KFR_CDECL __declspec(dllimport) #endif +#else // !WIN32 +#ifdef KFR_BUILDING_DLL +#define KFR_API_SPEC KFR_CDECL __attribute__((visibility("default"))) #else #define KFR_API_SPEC KFR_CDECL #endif +#endif #ifdef __cplusplus extern "C" @@ -84,6 +88,8 @@ extern "C" KFR_API_SPEC const char* kfr_enabled_archs(); KFR_API_SPEC int kfr_current_arch(); + KFR_API_SPEC const char* kfr_last_error(); + typedef float kfr_f32; typedef double kfr_f64; #if defined __STDC_IEC_559_COMPLEX__ && !defined KFR_NO_C_COMPLEX_TYPES diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -706,54 +706,6 @@ extern char* gets(char* __s); #define CMT_NARGS2(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, ...) _10 #define CMT_NARGS(...) CMT_NARGS2(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#ifdef CMT_MULTI_ENABLED_AVX512 -#define CMT_IF_ENABLED_AVX512(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_AVX512(...) -#endif - -#ifdef CMT_MULTI_ENABLED_AVX2 -#define CMT_IF_ENABLED_AVX2(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_AVX2(...) -#endif - -#ifdef CMT_MULTI_ENABLED_AVX -#define CMT_IF_ENABLED_AVX(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_AVX(...) -#endif - -#ifdef CMT_MULTI_ENABLED_SSE42 -#define CMT_IF_ENABLED_SSE42(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_SSE42(...) -#endif - -#ifdef CMT_MULTI_ENABLED_SSE41 -#define CMT_IF_ENABLED_SSE41(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_SSE41(...) -#endif - -#ifdef CMT_MULTI_ENABLED_SSSE3 -#define CMT_IF_ENABLED_SSSE3(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_SSSE3(...) -#endif - -#ifdef CMT_MULTI_ENABLED_SSE3 -#define CMT_IF_ENABLED_SSE3(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_SSE3(...) -#endif - -#ifdef CMT_MULTI_ENABLED_SSE2 -#define CMT_IF_ENABLED_SSE2(...) __VA_ARGS__ -#else -#define CMT_IF_ENABLED_SSE2(...) -#endif - #define CMT_IF_IS_AVX512(...) #define CMT_IF_IS_AVX2(...) #define CMT_IF_IS_AVX(...) @@ -789,46 +741,16 @@ extern char* gets(char* __s); #define CMT_IF_IS_SSE2(...) __VA_ARGS__ #endif -#ifdef CMT_MULTI - -#define CMT_MULTI_PROTO_GATE(...) \ - if (cpu == cpu_t::runtime) \ - cpu = get_cpu(); \ - switch (cpu) \ - { \ - case cpu_t::avx512: \ - CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \ - case cpu_t::avx2: \ - CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \ - case cpu_t::avx: \ - CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \ - case cpu_t::sse41: \ - CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \ - case cpu_t::ssse3: \ - CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \ - case cpu_t::sse3: \ - CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \ - case cpu_t::sse2: \ - CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \ - default: \ - return {}; \ - } -#define CMT_MULTI_PROTO(...) \ - inline namespace CMT_ARCH_NAME \ +#ifdef CMT_COMPILER_GNU +#define CMT_UNREACHABLE \ + do \ { \ - __VA_ARGS__ \ - } \ - CMT_IF_ENABLED_SSE2(CMT_IF_IS_SSE2(inline) namespace sse2{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_SSE3(CMT_IF_IS_SSE3(inline) namespace sse3{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_SSSE3(CMT_IF_IS_SSSE3(inline) namespace ssse3{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_SSE41(CMT_IF_IS_SSE41(inline) namespace sse41{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_AVX(CMT_IF_IS_AVX(inline) namespace avx{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_AVX2(CMT_IF_IS_AVX2(inline) namespace avx2{ __VA_ARGS__ }) \ - CMT_IF_ENABLED_AVX512(CMT_IF_IS_AVX512(inline) namespace avx512{ __VA_ARGS__ }) -#else -#define CMT_MULTI_PROTO(...) \ - inline namespace CMT_ARCH_NAME \ + __builtin_unreachable(); \ + } while (0) +#elif defined(_MSC_VER) +#define CMT_UNREACHABLE \ + do \ { \ - __VA_ARGS__ \ - } + __assume(false); \ + } while (0) #endif diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp @@ -42,41 +42,40 @@ CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow") namespace kfr { -inline namespace CMT_ARCH_NAME -{ -namespace intrinsics +namespace internal_generic { template <typename T> -univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2); -template <typename T> -univector<T> correlate(const univector_ref<const T>& src1, const univector_ref<const T>& src2); -template <typename T> -univector<T> autocorrelate(const univector_ref<const T>& src1); -} // namespace intrinsics +univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2, + bool correlate = false); +} /// @brief Convolution -template <typename T, univector_tag Tag1, univector_tag Tag2> -univector<T> convolve(const univector<T, Tag1>& src1, const univector<T, Tag2>& src2) +template <typename T1, typename T2, univector_tag Tag1, univector_tag Tag2, + CMT_ENABLE_IF(std::is_same_v<std::remove_const_t<T1>, std::remove_const_t<T2>>)> +univector<std::remove_const_t<T1>> convolve(const univector<T1, Tag1>& src1, const univector<T2, Tag2>& src2) { - return intrinsics::convolve(src1.slice(), src2.slice()); + return internal_generic::convolve(src1.slice(), src2.slice()); } /// @brief Correlation -template <typename T, univector_tag Tag1, univector_tag Tag2> -univector<T> correlate(const univector<T, Tag1>& src1, const univector<T, Tag2>& src2) +template <typename T1, typename T2, univector_tag Tag1, univector_tag Tag2, + CMT_ENABLE_IF(std::is_same_v<std::remove_const_t<T1>, std::remove_const_t<T2>>)> +univector<std::remove_const_t<T1>> correlate(const univector<T1, Tag1>& src1, const univector<T2, Tag2>& src2) { - return intrinsics::correlate(src1.slice(), src2.slice()); + return internal_generic::convolve(src1.slice(), src2.slice(), true); } /// @brief Auto-correlation template <typename T, univector_tag Tag1> -univector<T> autocorrelate(const univector<T, Tag1>& src) +univector<std::remove_const_t<T>> autocorrelate(const univector<T, Tag1>& src) { - return intrinsics::autocorrelate(src.slice()); + univector<std::remove_const_t<T>> result = internal_generic::convolve(src.slice(), src.slice(), true); + result = result.slice(result.size() / 2); + return result; } -namespace internal +namespace internal_generic { /// @brief Utility class to abstract real/complex differences template <typename T> @@ -94,7 +93,7 @@ struct dft_conv_plan<complex<T>> : public dft_plan<T> size_t csize() const { return this->size; } }; -} // namespace internal +} // namespace internal_generic /// @brief Convolution using Filter API template <typename T> @@ -118,7 +117,7 @@ protected: using ST = subtype<T>; constexpr static bool real_fft = !std::is_same_v<T, complex<ST>>; - using plan_t = internal::dft_conv_plan<T>; + using plan_t = internal_generic::dft_conv_plan<T>; // Length of filter data. size_t data_size; @@ -147,17 +146,6 @@ protected: // Overlap saved from previous block to add into current block. univector<T> overlap; }; -} // namespace CMT_ARCH_NAME - -CMT_MULTI_PROTO(template <typename T> - filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size);) -#ifdef CMT_MULTI -template <typename T> -KFR_FUNCTION filter<T>* make_convolve_filter(cpu_t cpu, const univector_ref<const T>& taps, size_t block_size) -{ - CMT_MULTI_PROTO_GATE(make_convolve_filter<T>(taps, block_size)) -} -#endif } // namespace kfr CMT_PRAGMA_GNU(GCC diagnostic pop) diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -103,7 +103,7 @@ enum class dft_type enum class dft_order { normal, - internal, // possibly bit/digit-reversed, implementation-defined, faster to compute + internal, // possibly bit/digit-reversed, implementation-defined, may be faster to compute }; enum class dft_pack_format @@ -124,8 +124,10 @@ struct dft_stage; template <typename T> using dft_stage_ptr = std::unique_ptr<dft_stage<T>>; -CMT_MULTI_PROTO(template <typename T> void dft_initialize(dft_plan<T>& plan);) -CMT_MULTI_PROTO(template <typename T> void dft_real_initialize(dft_plan_real<T>& plan);) +template <typename T> +void dft_initialize(dft_plan<T>& plan); +template <typename T> +void dft_real_initialize(dft_plan_real<T>& plan); /// @brief 1D DFT/FFT template <typename T> @@ -146,38 +148,17 @@ struct dft_plan bool is_initialized() const { return size != 0; } - explicit dft_plan(cpu_t cpu, size_t size, dft_order order = dft_order::normal) - : size(size), temp_size(0), data_size(0), arblen(false) + [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with " + "KFR_ENABLE_MULTIARCH")]] explicit dft_plan(cpu_t cpu, size_t size, + dft_order order = dft_order::normal) + : dft_plan(size, order) { -#ifdef KFR_DFT_MULTI - if (cpu == cpu_t::runtime) - cpu = get_cpu(); - switch (cpu) - { - case cpu_t::avx512: - CMT_IF_ENABLED_AVX512(avx512::dft_initialize(*this); break;) - case cpu_t::avx2: - CMT_IF_ENABLED_AVX2(avx2::dft_initialize(*this); break;) - case cpu_t::avx: - CMT_IF_ENABLED_AVX(avx::dft_initialize(*this); break;) - case cpu_t::sse42: - case cpu_t::sse41: - CMT_IF_ENABLED_SSE41(sse41::dft_initialize(*this); break;) - case cpu_t::ssse3: - CMT_IF_ENABLED_SSSE3(ssse3::dft_initialize(*this); break;) - case cpu_t::sse3: - CMT_IF_ENABLED_SSE3(sse3::dft_initialize(*this); break;) - default: - CMT_IF_ENABLED_SSE2(sse2::dft_initialize(*this); break;); - } -#else (void)cpu; - dft_initialize(*this); -#endif } explicit dft_plan(size_t size, dft_order order = dft_order::normal) - : dft_plan(cpu_t::runtime, size, order) + : size(size), temp_size(0), data_size(0), arblen(false) { + dft_initialize(*this); } void dump() const @@ -411,40 +392,19 @@ struct dft_plan_real : dft_plan<T> bool is_initialized() const { return size != 0; } - explicit dft_plan_real(cpu_t cpu, size_t size, dft_pack_format fmt = dft_pack_format::CCs) - : dft_plan<T>(typename dft_plan<T>::noinit{}, size / 2), size(size), fmt(fmt) + [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with " + "KFR_ENABLE_MULTIARCH")]] explicit dft_plan_real(cpu_t cpu, size_t size, + dft_pack_format fmt = dft_pack_format::CCs) + : dft_plan_real(size, fmt) { - KFR_LOGIC_CHECK(is_even(size), "dft_plan_real requires size to be even"); -#ifdef KFR_DFT_MULTI - if (cpu == cpu_t::runtime) - cpu = get_cpu(); - switch (cpu) - { - case cpu_t::avx512: - CMT_IF_ENABLED_AVX512(avx512::dft_real_initialize(*this); break;) - case cpu_t::avx2: - CMT_IF_ENABLED_AVX2(avx2::dft_real_initialize(*this); break;) - case cpu_t::avx: - CMT_IF_ENABLED_AVX(avx::dft_real_initialize(*this); break;) - case cpu_t::sse42: - case cpu_t::sse41: - CMT_IF_ENABLED_SSE41(sse41::dft_real_initialize(*this); break;) - case cpu_t::ssse3: - CMT_IF_ENABLED_SSSE3(ssse3::dft_real_initialize(*this); break;) - case cpu_t::sse3: - CMT_IF_ENABLED_SSE3(sse3::dft_real_initialize(*this); break;) - default: - CMT_IF_ENABLED_SSE2(sse2::dft_real_initialize(*this); break;); - } -#else (void)cpu; - dft_real_initialize(*this); -#endif } explicit dft_plan_real(size_t size, dft_pack_format fmt = dft_pack_format::CCs) - : dft_plan_real(cpu_t::runtime, size, fmt) + : dft_plan<T>(typename dft_plan<T>::noinit{}, size / 2), size(size), fmt(fmt) { + KFR_LOGIC_CHECK(is_even(size), "dft_plan_real requires size to be even"); + dft_real_initialize(*this); } void execute(complex<T>*, const complex<T>*, u8*, bool = false) const = delete; @@ -501,9 +461,10 @@ struct dct_plan : dft_plan<T> { dct_plan(size_t size) : dft_plan<T>(size) { this->temp_size += sizeof(complex<T>) * size * 2; } - dct_plan(cpu_t cpu, size_t size) : dft_plan<T>(cpu, size) + [[deprecated("cpu parameter is deprecated. Runtime dispatch is used if built with " + "KFR_ENABLE_MULTIARCH")]] dct_plan(cpu_t cpu, size_t size) + : dct_plan(size) { - this->temp_size += sizeof(complex<T>) * size * 2; } KFR_MEM_INTRINSIC void execute(T* out, const T* in, u8* temp, bool inverse = false) const diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -89,9 +89,6 @@ struct biquad_params biquad_params<T> normalized_all() const { return normalized_a0().normalized_b0(); } }; -inline namespace CMT_ARCH_NAME -{ - template <typename T, size_t filters> struct biquad_state { @@ -139,6 +136,9 @@ struct biquad_block } }; +inline namespace CMT_ARCH_NAME +{ + template <size_t filters, typename T, typename E1> struct expression_biquads_l : public expression_with_traits<E1> { @@ -170,13 +170,14 @@ struct expression_biquads : expression_with_traits<E1> }; template <size_t filters, typename T> -KFR_INTRINSIC vec<T, filters> biquad_process(const biquad_block<T, filters>& bq, - biquad_state<T, filters>& state, const vec<T, filters>& in) +KFR_INTRINSIC T biquad_process(vec<T, filters>& out, const biquad_block<T, filters>& bq, + biquad_state<T, filters>& state, T in0, const vec<T, filters>& delayline) { - const vec<T, filters> out = bq.b0 * in + state.s1; - state.s1 = state.s2 + bq.b1 * in - bq.a1 * out; - state.s2 = bq.b2 * in - bq.a2 * out; - return out; + vec<T, filters> in = insertleft(in0, delayline); + out = bq.b0 * in + state.s1; + state.s1 = state.s2 + bq.b1 * in - bq.a1 * out; + state.s2 = bq.b2 * in - bq.a2 * out; + return out[filters - 1]; } template <size_t filters, typename T, typename E1, size_t N> @@ -189,8 +190,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads_l<filters, T, E1>& CMT_LOOP_UNROLL for (size_t i = 0; i < N; i++) { - self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out)); - out[i] = self.state.out[filters - 1]; + out[i] = biquad_process(self.state.out, self.bq, self.state, in[i], self.state.out); } return out; @@ -204,7 +204,7 @@ KFR_INTRINSIC void begin_pass(const expression_biquads<filters, T, E1>& self, sh for (index_t i = 0; i < filters - 1; i++) { const vec<T, 1> in = i < size ? get_elements(self.first(), shape<1>{ i }, axis_params_v<0, 1>) : 0; - self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out)); + biquad_process(self.state.out, self.bq, self.state, in[0], self.state.out); } } template <size_t filters, typename T, typename E1> @@ -226,8 +226,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s CMT_LOOP_UNROLL for (size_t i = 0; i < N; i++) { - self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out)); - out[i] = self.state.out[filters - 1]; + out[i] = biquad_process(self.state.out, self.bq, self.state, in[i], self.state.out); } if (index.front() + N == self.block_end) self.saved_state = self.state; @@ -237,8 +236,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s CMT_LOOP_UNROLL for (size_t i = 0; i < N; i++) { - self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out)); - out[i] = self.state.out[filters - 1]; + out[i] = biquad_process(self.state.out, self.bq, self.state, T(0), self.state.out); } } else @@ -248,14 +246,12 @@ KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& s { const vec<T, 1> in = get_elements(self.first(), index.add_at(i, cval<index_t, 0>), axis_params_v<0, 1>); - self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out)); - out[i] = self.state.out[filters - 1]; + out[i] = biquad_process(self.state.out, self.bq, self.state, in[0], self.state.out); } self.saved_state = self.state; for (; i < N; i++) { - self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out)); - out[i] = self.state.out[filters - 1]; + out[i] = biquad_process(self.state.out, self.bq, self.state, T(0), self.state.out); } } return out; @@ -324,14 +320,13 @@ KFR_FUNCTION expression_handle<T, 1> biquad(const std::vector<biquad_params<T>>& return biquad<maxfiltercount>(bq.data(), bq.size(), std::forward<E1>(e1)); } -template <typename T, size_t maxfiltercount = 4> +} // namespace CMT_ARCH_NAME + +template <typename T> class biquad_filter : public expression_filter<T> { public: - biquad_filter(const biquad_params<T>* bq, size_t count) - : expression_filter<T>(biquad<maxfiltercount>(bq, count, placeholder<T>())) - { - } + biquad_filter(const biquad_params<T>* bq, size_t count); template <size_t N> biquad_filter(const biquad_params<T> (&bq)[N]) : biquad_filter(bq, N) @@ -340,17 +335,4 @@ public: biquad_filter(const std::vector<biquad_params<T>>& bq) : biquad_filter(bq.data(), bq.size()) {} }; - -} // namespace CMT_ARCH_NAME - -CMT_MULTI_PROTO(template <typename T, size_t maxfiltercount> - filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count);) - -#ifdef CMT_MULTI -template <typename T, size_t maxfiltercount> -KFR_FUNCTION filter<T>* make_biquad_filter(cpu_t cpu, const biquad_params<T>* bq, size_t count) -{ - CMT_MULTI_PROTO_GATE(make_biquad_filter<T, maxfiltercount>(bq, count)) -} -#endif } // namespace kfr diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -39,8 +39,6 @@ CMT_PRAGMA_MSVC(warning(disable : 4244)) namespace kfr { -inline namespace CMT_ARCH_NAME -{ template <typename T, size_t Size> using fir_taps = univector<T, Size>; @@ -90,6 +88,9 @@ struct moving_sum_state<U, tag_dynamic_vector> mutable size_t head_cursor, tail_cursor; }; +inline namespace CMT_ARCH_NAME +{ + template <size_t tapcount, typename T, typename U, typename E1, bool stateless = false> struct expression_short_fir : expression_with_traits<E1> { @@ -281,6 +282,8 @@ short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&& std::forward<E1>(e1), state); } +} // namespace CMT_ARCH_NAME + template <typename T, typename U = T> class fir_filter : public filter<U> { @@ -297,34 +300,15 @@ public: } protected: - void process_buffer(U* dest, const U* src, size_t size) final - { - make_univector(dest, size) = fir(state, make_univector(src, size)); - } - void process_expression(U* dest, const expression_handle<U, 1>& src, size_t size) final - { - make_univector(dest, size) = fir(state, src); - } + void process_buffer(U* dest, const U* src, size_t size) final; + void process_expression(U* dest, const expression_handle<U, 1>& src, size_t size) final; -private: fir_state<T, U> state; }; template <typename T, typename U = T> using filter_fir = fir_filter<T, U>; -} // namespace CMT_ARCH_NAME - -CMT_MULTI_PROTO(template <typename U, typename T> - filter<U>* make_fir_filter(const univector_ref<const T>& taps);) - -#ifdef CMT_MULTI -template <typename U, typename T> -KFR_FUNCTION filter<U>* make_fir_filter(cpu_t cpu, const univector_ref<const T>& taps) -{ - CMT_MULTI_PROTO_GATE(make_fir_filter<U>(taps)) -} -#endif } // namespace kfr CMT_PRAGMA_MSVC(warning(pop)) diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp @@ -46,9 +46,6 @@ enum class sample_rate_conversion_quality : int perfect = 12, }; -inline namespace CMT_ARCH_NAME -{ - using resample_quality = sample_rate_conversion_quality; /// @brief Sample Rate converter @@ -58,7 +55,7 @@ struct samplerate_converter using itype = i64; using ftype = subtype<T>; -private: +protected: KFR_MEM_INTRINSIC ftype window(ftype n) const { return modzerobessel(kaiser_beta * sqrt(1 - sqr(2 * n - 1))) * reciprocal(modzerobessel(kaiser_beta)); @@ -95,40 +92,7 @@ public: } samplerate_converter(sample_rate_conversion_quality quality, itype interpolation_factor, - itype decimation_factor, ftype scale = ftype(1), ftype cutoff = 0.5f) - : kaiser_beta(window_param(quality)), depth(static_cast<itype>(filter_order(quality))), - input_position(0), output_position(0) - { - const i64 gcf = gcd(interpolation_factor, decimation_factor); - interpolation_factor /= gcf; - decimation_factor /= gcf; - - taps = depth * interpolation_factor; - order = size_t(depth * interpolation_factor - 1); - - this->interpolation_factor = interpolation_factor; - this->decimation_factor = decimation_factor; - - const itype halftaps = taps / 2; - filter = univector<T>(size_t(taps), T()); - delay = univector<T>(size_t(depth), T()); - - cutoff = cutoff - transition_width() / c_pi<ftype, 4>; - - cutoff = cutoff / std::max(decimation_factor, interpolation_factor); - - for (itype j = 0, jj = 0; j < taps; j++) - { - filter[size_t(j)] = - sinc((jj - halftaps) * cutoff * c_pi<ftype, 2>) * window(ftype(jj) / ftype(taps - 1)); - jj += size_t(interpolation_factor); - if (jj >= taps) - jj = jj - taps + 1; - } - - const T s = reciprocal(sum(filter)) * static_cast<ftype>(interpolation_factor * scale); - filter = filter * s; - } + itype decimation_factor, ftype scale = ftype(1), ftype cutoff = 0.5f); KFR_MEM_INTRINSIC itype input_position_to_intermediate(itype in_pos) const { @@ -186,56 +150,9 @@ public: template <univector_tag Tag> size_t process(univector<T, Tag>& output, univector_ref<const T> input) { - const itype required_input_size = input_size_for_output(output.size()); - - const itype input_size = input.size(); - for (size_t i = 0; i < output.size(); i++) - { - const itype intermediate_index = - output_position_to_intermediate(static_cast<itype>(i) + output_position); - const itype intermediate_start = intermediate_index - taps + 1; - const std::lldiv_t input_pos = - floor_div(intermediate_start + interpolation_factor - 1, interpolation_factor); - const itype input_start = input_pos.quot; // first input sample - const itype tap_start = interpolation_factor - 1 - input_pos.rem; - const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(tap_start * depth)); - - if (input_start >= input_position + input_size) - { - output[i] = T(0); - } - else if (input_start >= input_position) - { - output[i] = - dotproduct(truncate(padded(input.slice(input_start - input_position, depth)), depth), - tap_ptr.truncate(depth)); - } - else - { - const itype prev_count = input_position - input_start; - output[i] = - dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr.truncate(prev_count)) + - dotproduct(truncate(padded(input.truncate(size_t(depth - prev_count))), - size_t(depth - prev_count)), - tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count))); - } - } - - if (required_input_size >= depth) - { - delay.slice(0, delay.size()) = padded(input.slice(size_t(required_input_size - depth))); - } - else - { - delay.truncate(size_t(depth - required_input_size)) = delay.slice(size_t(required_input_size)); - delay.slice(size_t(depth - required_input_size)) = padded(input); - } - - input_position += required_input_size; - output_position += output.size(); - - return required_input_size; + return process_impl(output.slice(), input); } + KFR_MEM_INTRINSIC double get_fractional_delay() const { return (taps - 1) * 0.5 / decimation_factor; } KFR_MEM_INTRINSIC size_t get_delay() const { return static_cast<size_t>(get_fractional_delay()); } @@ -247,10 +164,17 @@ public: itype decimation_factor; univector<T> filter; univector<T> delay; + +protected: itype input_position; itype output_position; + + size_t process_impl(univector_ref<T> output, univector_ref<const T> input); }; +inline namespace CMT_ARCH_NAME +{ + namespace internal { diff --git a/include/kfr/kfr.h b/include/kfr/kfr.h @@ -68,9 +68,16 @@ #define KFR_BUILD_DETAILS_2 "" #endif +#ifdef KFR_ENABLED_ARCHS +#define KFR_ENABLED_ARCHS_LIST "[" KFR_ENABLED_ARCHS "] " +#else +#define KFR_ENABLED_ARCHS_LIST "" +#endif + #define KFR_VERSION_FULL \ "KFR " KFR_VERSION_STRING KFR_DEBUG_STR \ - " " CMT_STRINGIFY(CMT_ARCH_NAME) " " CMT_ARCH_BITNESS_NAME " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME \ + " " CMT_STRINGIFY(CMT_ARCH_NAME) " " KFR_ENABLED_ARCHS_LIST CMT_ARCH_BITNESS_NAME \ + " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME \ ")" KFR_BUILD_DETAILS_1 KFR_BUILD_DETAILS_2 #ifdef __cplusplus diff --git a/include/kfr/multiarch.h b/include/kfr/multiarch.h @@ -0,0 +1,196 @@ +#include "cident.h" + +#ifdef CMT_ARCH_X86 + +// x86 + +#ifdef CMT_MULTI_ENABLED_AVX512 +#define CMT_IF_ENABLED_AVX512(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX512(...) +#endif + +#ifdef CMT_MULTI_ENABLED_AVX2 +#define CMT_IF_ENABLED_AVX2(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX2(...) +#endif + +#ifdef CMT_MULTI_ENABLED_AVX +#define CMT_IF_ENABLED_AVX(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE42 +#define CMT_IF_ENABLED_SSE42(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE42(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE41 +#define CMT_IF_ENABLED_SSE41(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE41(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSSE3 +#define CMT_IF_ENABLED_SSSE3(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSSE3(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE3 +#define CMT_IF_ENABLED_SSE3(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE3(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE2 +#define CMT_IF_ENABLED_SSE2(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE2(...) +#endif + +#ifdef CMT_MULTI + +#define CMT_MULTI_PROTO_GATE(...) \ + if (cpu == cpu_t::runtime) \ + cpu = get_cpu(); \ + switch (cpu) \ + { \ + case cpu_t::avx512: \ + CMT_IF_ENABLED_AVX512(return avx512::__VA_ARGS__;) \ + case cpu_t::avx2: \ + CMT_IF_ENABLED_AVX2(return avx2::__VA_ARGS__;) \ + case cpu_t::avx: \ + CMT_IF_ENABLED_AVX(return avx::__VA_ARGS__;) \ + case cpu_t::sse42: \ + CMT_IF_ENABLED_SSE42(return sse42::__VA_ARGS__;) \ + case cpu_t::sse41: \ + CMT_IF_ENABLED_SSE41(return sse41::__VA_ARGS__;) \ + case cpu_t::ssse3: \ + CMT_IF_ENABLED_SSSE3(return ssse3::__VA_ARGS__;) \ + case cpu_t::sse3: \ + CMT_IF_ENABLED_SSE3(return sse3::__VA_ARGS__;) \ + case cpu_t::sse2: \ + CMT_IF_ENABLED_SSE2(return sse2::__VA_ARGS__;) \ + default: \ + CMT_UNREACHABLE; \ + } + +#define CMT_MULTI_GATE(...) \ + switch (get_cpu()) \ + { \ + case cpu_t::avx512: \ + CMT_IF_ENABLED_AVX512({ \ + namespace ns = kfr::avx512; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::avx2: \ + CMT_IF_ENABLED_AVX2({ \ + namespace ns = kfr::avx2; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::avx: \ + CMT_IF_ENABLED_AVX({ \ + namespace ns = kfr::avx; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::sse42: \ + CMT_IF_ENABLED_SSE42({ \ + namespace ns = kfr::sse42; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::sse41: \ + CMT_IF_ENABLED_SSE41({ \ + namespace ns = kfr::sse41; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::ssse3: \ + CMT_IF_ENABLED_SSSE3({ \ + namespace ns = kfr::ssse3; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::sse3: \ + CMT_IF_ENABLED_SSE3({ \ + namespace ns = kfr::sse3; \ + __VA_ARGS__; \ + break; \ + }) \ + case cpu_t::sse2: \ + CMT_IF_ENABLED_SSE2({ \ + namespace ns = kfr::sse2; \ + __VA_ARGS__; \ + break; \ + }) \ + default: \ + CMT_UNREACHABLE; \ + } + +#define CMT_MULTI_PROTO(...) \ + CMT_IF_ENABLED_SSE2(CMT_IF_IS_SSE2(inline) namespace sse2{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSE3(CMT_IF_IS_SSE3(inline) namespace sse3{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSSE3(CMT_IF_IS_SSSE3(inline) namespace ssse3{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSE42(CMT_IF_IS_SSE42(inline) namespace sse42{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSE41(CMT_IF_IS_SSE41(inline) namespace sse41{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX(CMT_IF_IS_AVX(inline) namespace avx{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX2(CMT_IF_IS_AVX2(inline) namespace avx2{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX512(CMT_IF_IS_AVX512(inline) namespace avx512{ __VA_ARGS__ }) +#else +#define CMT_MULTI_GATE(...) \ + do \ + { \ + namespace ns = kfr::CMT_ARCH_NAME; \ + __VA_ARGS__; \ + break; \ + } while (0) + +#define CMT_MULTI_PROTO(...) \ + inline namespace CMT_ARCH_NAME \ + { \ + __VA_ARGS__ \ + } +#endif + +#if defined(CMT_BASE_ARCH) || !defined(CMT_MULTI) +#define CMT_MULTI_NEEDS_GATE +#else +#endif + +#else + +// ARM + +#define CMT_MULTI_PROTO_GATE(...) \ + do \ + { \ + return CMT_ARCH_NAME::__VA_ARGS__; \ + } while (0) + +#define CMT_MULTI_GATE(...) \ + do \ + { \ + namespace ns = kfr::CMT_ARCH_NAME; \ + __VA_ARGS__; \ + break; \ + } while (0) + +#define CMT_MULTI_PROTO(...) \ + inline namespace CMT_ARCH_NAME \ + { \ + __VA_ARGS__ \ + } + +#if defined(CMT_BASE_ARCH) || !defined(CMT_MULTI) +#define CMT_MULTI_NEEDS_GATE +#else +#endif + +#endif diff --git a/sources.cmake b/sources.cmake @@ -20,6 +20,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/cident.h ${PROJECT_SOURCE_DIR}/include/kfr/config.h ${PROJECT_SOURCE_DIR}/include/kfr/kfr.h + ${PROJECT_SOURCE_DIR}/include/kfr/multiarch.h ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/endianness.hpp @@ -318,6 +319,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/cident.h ${PROJECT_SOURCE_DIR}/include/kfr/config.h ${PROJECT_SOURCE_DIR}/include/kfr/kfr.h + ${PROJECT_SOURCE_DIR}/include/kfr/multiarch.h ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/endianness.hpp @@ -457,12 +459,21 @@ set( ${PROJECT_SOURCE_DIR}/src/dft/convolution-impl.cpp ${PROJECT_SOURCE_DIR}/src/dft/dft-impl-f32.cpp ${PROJECT_SOURCE_DIR}/src/dft/dft-impl-f64.cpp + ${PROJECT_SOURCE_DIR}/src/dft/dft.cpp ${PROJECT_SOURCE_DIR}/src/dft/fft-impl-f32.cpp ${PROJECT_SOURCE_DIR}/src/dft/fft-impl-f64.cpp ) set( + KFR_DSP_SRC + ${PROJECT_SOURCE_DIR}/src/dsp/biquad.cpp + ${PROJECT_SOURCE_DIR}/src/dsp/fir.cpp + ${PROJECT_SOURCE_DIR}/src/dsp/sample_rate_conversion.cpp +) + + +set( KFR_IO_SRC ${PROJECT_SOURCE_DIR}/src/io/audiofile-impl.cpp ) diff --git a/src/capi/CMakeLists.txt b/src/capi/CMakeLists.txt @@ -16,118 +16,41 @@ cmake_minimum_required(VERSION 3.12) -if (WIN32) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -endif () - -if (X86) - set(CAPI_ARCHS sse2 sse41 avx avx2 avx512) -else () - set(CAPI_ARCHS ${KFR_ARCH}) +if (NOT WIN32) + set(CMAKE_CXX_VISIBILITY_PRESET "hidden") + set(CMAKE_C_VISIBILITY_PRESET "hidden") endif () -set(CMAKE_CXX_VISIBILITY_PRESET "default") -set(CMAKE_C_VISIBILITY_PRESET "default") - if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-fdiagnostics-absolute-paths) endif () -if (MSVC) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") -endif () - -if (APPLE) - add_compile_options(-mmacosx-version-min=10.9) -endif () - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) - -add_library(kfr_capi_all INTERFACE) -target_link_libraries(kfr_capi_all INTERFACE kfr) -if (NOT WIN32) - add_library(kfr_capi_all_pic INTERFACE) - target_link_libraries(kfr_capi_all_pic INTERFACE kfr) -endif () - -function (add_c_library ARCH) - add_library(kfr_capi_${ARCH} STATIC ${KFR_DFT_SRC} dsp.cpp) - target_link_libraries(kfr_capi_${ARCH} kfr) - target_set_arch(kfr_capi_${ARCH} PRIVATE ${ARCH}) - target_link_libraries(kfr_capi_all INTERFACE kfr_capi_${ARCH}) - dft_compile_options(kfr_capi_${ARCH}) - - if (NOT WIN32) - add_library(kfr_capi_${ARCH}_pic STATIC ${KFR_DFT_SRC} dsp.cpp) - set_property(TARGET kfr_capi_${ARCH}_pic - PROPERTY POSITION_INDEPENDENT_CODE 1) - target_link_libraries(kfr_capi_${ARCH}_pic kfr) - target_set_arch(kfr_capi_${ARCH}_pic PRIVATE ${ARCH}) - - target_link_libraries(kfr_capi_all_pic INTERFACE kfr_capi_${ARCH}_pic) - dft_compile_options(kfr_capi_${ARCH}_pic) - endif () - -endfunction () - add_library(kfr_capi SHARED capi.cpp) - -foreach (A IN LISTS CAPI_ARCHS) - add_c_library(${A}) -endforeach() - -list(GET CAPI_ARCHS 0 FIRST_ARCH) - -link_as_whole(kfr_capi_all INTERFACE kfr_capi_${FIRST_ARCH}) -if (NOT WIN32) - link_as_whole(kfr_capi_all_pic INTERFACE kfr_capi_${FIRST_ARCH}_pic) -endif () - -target_compile_definitions( - kfr_capi - PRIVATE -DKFR_DFT_MULTI=1 - -DCMT_MULTI=1 - -DCMT_MULTI_ENABLED_SSE2=1 - -DCMT_MULTI_ENABLED_SSE41=1 - -DCMT_MULTI_ENABLED_AVX=1 - -DCMT_MULTI_ENABLED_AVX2=1 - -DCMT_MULTI_ENABLED_AVX512=1 - -DKFR_BUILDING_DLL=1) - -target_set_arch(kfr_capi PRIVATE ${FIRST_ARCH}) - -if (WIN32) - target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all) -else () - target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all_pic) - - if (APPLE) - message( - STATUS - "Minimum macOS version is set to ${CMAKE_OSX_DEPLOYMENT_TARGET}" - ) - message(STATUS "Set CMAKE_OSX_DEPLOYMENT_TARGET variable to change") - else () - set_property( - TARGET kfr_capi - APPEND - PROPERTY LINK_LIBRARIES - -nodefaultlibs - -Wl,-Bdynamic - -lm - -lc - -Wl,-Bstatic - -lstdc++ - -lgcc - -s) +target_link_libraries(kfr_capi PRIVATE kfr_dft kfr_dsp) + +target_compile_definitions(kfr_capi PRIVATE KFR_BUILDING_DLL=1) + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + set_property( + TARGET kfr_capi + APPEND + PROPERTY LINK_LIBRARIES + -nodefaultlibs + -Wl,-Bdynamic + -lm + -lc + -Wl,-Bstatic + -lstdc++ + -lgcc + -s) +endif () + +if (KFR_INSTALL_LIBRARIES) + if (KFR_ENABLE_CAPI_BUILD) + install( + TARGETS kfr_capi + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin) endif () endif () diff --git a/src/capi/capi.cpp b/src/capi/capi.cpp @@ -28,349 +28,452 @@ #include <kfr/capi.h> #include <kfr/dft.hpp> #include <kfr/dsp.hpp> +#include <kfr/multiarch.h> namespace kfr { +static thread_local std::array<char, 256> error; + +void reset_error() { std::fill(error.begin(), error.end(), 0); } +void set_error(std::string_view s) +{ + size_t n = std::min(s.size(), error.size() - 1); + auto end = std::copy_n(s.begin(), n, error.begin()); + std::fill(end, error.end(), 0); +} + +template <typename Fn, typename R = std::invoke_result_t<Fn>, typename T> +static R try_fn(Fn&& fn, T fallback) +{ + try + { + auto result = fn(); + reset_error(); + return result; + } + catch (std::exception& e) + { + set_error(e.what()); + return fallback; + } + catch (...) + { + set_error("(unknown exception)"); + return fallback; + } +} + +template <typename Fn> +static void try_fn(Fn&& fn) +{ + try + { + fn(); + reset_error(); + } + catch (std::exception& e) + { + set_error(e.what()); + } + catch (...) + { + set_error("(unknown exception)"); + } +} extern "C" { -#define KFR_ENABLED_ARCHS "sse2,sse3,ssse3,sse4.1,avx,avx2,avx512" - const char* kfr_version_string() + KFR_API_SPEC const char* kfr_version_string() { - return "KFR " KFR_VERSION_STRING KFR_DEBUG_STR " " KFR_ENABLED_ARCHS " " CMT_ARCH_BITNESS_NAME + return "KFR " KFR_VERSION_STRING KFR_DEBUG_STR " " KFR_ENABLED_ARCHS_LIST " " CMT_ARCH_BITNESS_NAME " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME ")" KFR_BUILD_DETAILS_1 KFR_BUILD_DETAILS_2; } - uint32_t kfr_version() { return KFR_VERSION; } - const char* kfr_enabled_archs() { return KFR_ENABLED_ARCHS; } - int kfr_current_arch() { return static_cast<int>(get_cpu()); } + KFR_API_SPEC uint32_t kfr_version() { return KFR_VERSION; } + KFR_API_SPEC const char* kfr_enabled_archs() { return KFR_ENABLED_ARCHS_LIST; } + KFR_API_SPEC int kfr_current_arch() { return static_cast<int>(get_cpu()); } - void* kfr_allocate(size_t size) { return details::aligned_malloc(size, KFR_DEFAULT_ALIGNMENT); } - void* kfr_allocate_aligned(size_t size, size_t alignment) + KFR_API_SPEC const char* kfr_last_error() { return error.data(); } + + KFR_API_SPEC void* kfr_allocate(size_t size) + { + return details::aligned_malloc(size, KFR_DEFAULT_ALIGNMENT); + } + KFR_API_SPEC void* kfr_allocate_aligned(size_t size, size_t alignment) { return details::aligned_malloc(size, alignment); } - void kfr_deallocate(void* ptr) { return details::aligned_free(ptr); } - size_t kfr_allocated_size(void* ptr) { return details::aligned_size(ptr); } + KFR_API_SPEC void kfr_deallocate(void* ptr) { return details::aligned_free(ptr); } + KFR_API_SPEC size_t kfr_allocated_size(void* ptr) { return details::aligned_size(ptr); } - void* kfr_add_ref(void* ptr) + KFR_API_SPEC void* kfr_add_ref(void* ptr) { details::aligned_add_ref(ptr); return ptr; } - void kfr_release(void* ptr) { details::aligned_release(ptr); } + KFR_API_SPEC void kfr_release(void* ptr) { details::aligned_release(ptr); } - void* kfr_reallocate(void* ptr, size_t new_size) + KFR_API_SPEC void* kfr_reallocate(void* ptr, size_t new_size) { return details::aligned_reallocate(ptr, new_size, KFR_DEFAULT_ALIGNMENT); } - void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment) + KFR_API_SPEC void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment) { return details::aligned_reallocate(ptr, new_size, alignment); } - KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size) + KFR_API_SPEC KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size) { - if (size < 2) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(cpu_t::runtime, size)); + return try_fn([&]() { return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(size)); }, + nullptr); } - KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size) + KFR_API_SPEC KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size) { - if (size < 2) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(cpu_t::runtime, size)); + return try_fn([&]() { return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(size)); }, + nullptr); } - void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan) { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); } - void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan) { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); } + KFR_API_SPEC void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan) + { + try_fn([&] { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); }); + } + KFR_API_SPEC void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan) + { + try_fn([&] { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); }); + } - size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0); } - size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0); } - size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0); } - size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0); } - void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), - temp, kfr::cfalse); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), + reinterpret_cast<const kfr::complex<float>*>(in), temp, kfr::cfalse); + }); } - void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), - temp, kfr::cfalse); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), + reinterpret_cast<const kfr::complex<double>*>(in), temp, kfr::cfalse); + }); } - void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), - temp, kfr::ctrue); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), + reinterpret_cast<const kfr::complex<float>*>(in), temp, kfr::ctrue); + }); } - void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), - temp, kfr::ctrue); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), + reinterpret_cast<const kfr::complex<double>*>(in), temp, kfr::ctrue); + }); } - void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan) + KFR_API_SPEC void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan) { - delete reinterpret_cast<kfr::dft_plan<float>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dft_plan<float>*>(plan); }); } - void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan) + KFR_API_SPEC void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan) { - delete reinterpret_cast<kfr::dft_plan<double>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dft_plan<double>*>(plan); }); } // Real DFT plans - KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size, KFR_DFT_PACK_FORMAT pack_format) + KFR_API_SPEC KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size, + KFR_DFT_PACK_FORMAT pack_format) { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>( - new kfr::dft_plan_real<float>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format))); + return try_fn( + [&]() + { + return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>( + new kfr::dft_plan_real<float>(size, static_cast<dft_pack_format>(pack_format))); + }, + nullptr); } - KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size, KFR_DFT_PACK_FORMAT pack_format) + KFR_API_SPEC KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size, + KFR_DFT_PACK_FORMAT pack_format) { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>( - new kfr::dft_plan_real<double>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format))); + return try_fn( + [&]() + { + return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>( + new kfr::dft_plan_real<double>(size, static_cast<dft_pack_format>(pack_format))); + }, + nullptr); } - void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan) + KFR_API_SPEC void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan) { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump(); + try_fn([&]() { reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump(); }); } - void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan) + KFR_API_SPEC void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan) { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump(); + try_fn([&]() { reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump(); }); } - size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0); } - size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0); } - size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0); } - size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0); } - void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const float* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const float* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), in, temp); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), in, temp); + }); } - void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const double* in, uint8_t* temp) + KFR_API_SPEC void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const double* in, + uint8_t* temp) { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), in, temp); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), in, temp); + }); } - void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const kfr_c32* in, - uint8_t* temp) + KFR_API_SPEC void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, + const kfr_c32* in, uint8_t* temp) { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( - out, reinterpret_cast<const kfr::complex<float>*>(in), temp); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( + out, reinterpret_cast<const kfr::complex<float>*>(in), temp); + }); } - void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const kfr_c64* in, - uint8_t* temp) + KFR_API_SPEC void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, + const kfr_c64* in, uint8_t* temp) { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( - out, reinterpret_cast<const kfr::complex<double>*>(in), temp); + try_fn( + [&]() + { + reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( + out, reinterpret_cast<const kfr::complex<double>*>(in), temp); + }); } - void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan) + KFR_API_SPEC void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan) { - delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan); }); } - void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan) + KFR_API_SPEC void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan) { - delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan); }); } // Discrete Cosine Transform - KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size) + KFR_API_SPEC KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size) { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DCT_PLAN_F32*>(new kfr::dct_plan<float>(cpu_t::runtime, size)); + return try_fn([&]() { return reinterpret_cast<KFR_DCT_PLAN_F32*>(new kfr::dct_plan<float>(size)); }, + nullptr); } - KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size) + KFR_API_SPEC KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size) { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DCT_PLAN_F64*>(new kfr::dct_plan<double>(cpu_t::runtime, size)); + return try_fn([&]() { return reinterpret_cast<KFR_DCT_PLAN_F64*>(new kfr::dct_plan<double>(size)); }, + nullptr); } - void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan) { reinterpret_cast<kfr::dct_plan<float>*>(plan)->dump(); } - void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan) { reinterpret_cast<kfr::dct_plan<double>*>(plan)->dump(); } + KFR_API_SPEC void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan) + { + try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->dump(); }); + } + KFR_API_SPEC void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan) + { + try_fn([&]() { reinterpret_cast<kfr::dct_plan<double>*>(plan)->dump(); }); + } - size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; }, 0); } - size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; }, 0); } - size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan) + KFR_API_SPEC size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan) { - return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; }, 0); } - size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan) + KFR_API_SPEC size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan) { - return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + return try_fn([&]() { return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; }, 0); } - void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) + KFR_API_SPEC void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) { - reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::cfalse); + try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::cfalse); }); } - void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) + KFR_API_SPEC void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, + uint8_t* temp) { - reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::cfalse); + try_fn([&]() + { reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::cfalse); }); } - void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) + KFR_API_SPEC void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, + uint8_t* temp) { - reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::ctrue); + try_fn([&]() { reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp, kfr::ctrue); }); } - void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) + KFR_API_SPEC void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, + uint8_t* temp) { - reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::ctrue); + try_fn([&]() { reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp, kfr::ctrue); }); } - void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan) + KFR_API_SPEC void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan) { - delete reinterpret_cast<kfr::dct_plan<float>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dct_plan<float>*>(plan); }); } - void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan) + KFR_API_SPEC void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan) { - delete reinterpret_cast<kfr::dct_plan<double>*>(plan); + try_fn([&]() { delete reinterpret_cast<kfr::dct_plan<double>*>(plan); }); } // Filters - KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size) + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size) { -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F32*>(make_fir_filter<float>(make_univector(taps, size))); -#else - return reinterpret_cast<KFR_FILTER_F32*>( - make_fir_filter<float>(cpu_t::runtime, make_univector(taps, size))); -#endif + return try_fn( + [&]() + { return reinterpret_cast<KFR_FILTER_F32*>(new fir_filter<float>(make_univector(taps, size))); }, + nullptr); } - KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size) + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size) { -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F64*>(make_fir_filter<double>(make_univector(taps, size))); -#else - return reinterpret_cast<KFR_FILTER_F64*>( - make_fir_filter<double>(cpu_t::runtime, make_univector(taps, size))); -#endif + return try_fn( + [&]() + { return reinterpret_cast<KFR_FILTER_F64*>(new fir_filter<double>(make_univector(taps, size))); }, + nullptr); } - KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size, - size_t block_size) + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size, + size_t block_size) { -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F32*>( - make_convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024)); -#else - return reinterpret_cast<KFR_FILTER_F32*>(make_convolve_filter<float>( - cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024)); -#endif + return try_fn( + [&]() + { + return reinterpret_cast<KFR_FILTER_F32*>( + new convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024)); + }, + nullptr); } - KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size, - size_t block_size) + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size, + size_t block_size) { -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F64*>( - make_convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024)); -#else - return reinterpret_cast<KFR_FILTER_F64*>(make_convolve_filter<double>( - cpu_t::runtime, make_univector(taps, size), block_size ? block_size : 1024)); -#endif + return try_fn( + [&]() + { + return reinterpret_cast<KFR_FILTER_F64*>( + new convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024)); + }, + nullptr); } - KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count) + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count) { - if (sos_count < 1 || sos_count > 64) - return nullptr; - -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F32*>( - make_biquad_filter<float, 64>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); -#else - return reinterpret_cast<KFR_FILTER_F32*>(make_biquad_filter<float, 64>( - cpu_t::runtime, reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); -#endif + return try_fn( + [&]() + { + return reinterpret_cast<KFR_FILTER_F32*>( + new biquad_filter<float>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); + }, + nullptr); } - KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count) + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count) { - if (sos_count < 1 || sos_count > 64) - return nullptr; - -#ifndef CMT_MULTI - return reinterpret_cast<KFR_FILTER_F64*>( - make_biquad_filter<double, 64>(reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); -#else - return reinterpret_cast<KFR_FILTER_F64*>(make_biquad_filter<double, 64>( - cpu_t::runtime, reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); -#endif + return try_fn( + [&]() + { + return reinterpret_cast<KFR_FILTER_F64*>(new biquad_filter<double>( + reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); + }, + nullptr); } - void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, size_t size) + KFR_API_SPEC void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, + size_t size) { - reinterpret_cast<filter<float>*>(plan)->apply(output, input, size); + try_fn([&]() { reinterpret_cast<filter<float>*>(plan)->apply(output, input, size); }); } - void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, size_t size) + KFR_API_SPEC void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, + size_t size) { - reinterpret_cast<filter<double>*>(plan)->apply(output, input, size); + try_fn([&]() { reinterpret_cast<filter<double>*>(plan)->apply(output, input, size); }); } - void kfr_filter_reset_f32(KFR_FILTER_F32* plan) { reinterpret_cast<filter<float>*>(plan)->reset(); } - void kfr_filter_reset_f64(KFR_FILTER_F64* plan) { reinterpret_cast<filter<double>*>(plan)->reset(); } + KFR_API_SPEC void kfr_filter_reset_f32(KFR_FILTER_F32* plan) + { + try_fn([&]() { reinterpret_cast<filter<float>*>(plan)->reset(); }); + } + KFR_API_SPEC void kfr_filter_reset_f64(KFR_FILTER_F64* plan) + { + try_fn([&]() { reinterpret_cast<filter<double>*>(plan)->reset(); }); + } - void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan) { delete reinterpret_cast<filter<f32>*>(plan); } - void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan) { delete reinterpret_cast<filter<f64>*>(plan); } + KFR_API_SPEC void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan) + { + try_fn([&]() { delete reinterpret_cast<filter<f32>*>(plan); }); + } + KFR_API_SPEC void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan) + { + try_fn([&]() { delete reinterpret_cast<filter<f64>*>(plan); }); + } } } // namespace kfr diff --git a/src/capi/dsp.cpp b/src/capi/dsp.cpp @@ -1,28 +0,0 @@ -#include <kfr/dsp/biquad.hpp> -#include <kfr/dsp/fir.hpp> - -namespace kfr -{ -inline namespace CMT_ARCH_NAME -{ -template <typename U, typename T> -filter<U>* make_fir_filter(const univector_ref<const T>& taps) -{ - return new fir_filter<T, U>(taps); -} - -template filter<float>* make_fir_filter<float, float>(const univector_ref<const float>&); -template filter<double>* make_fir_filter<double, double>(const univector_ref<const double>&); -template filter<float>* make_fir_filter<float, double>(const univector_ref<const double>&); - -template <typename T, size_t maxfiltercount> -KFR_FUNCTION filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count) -{ - return new biquad_filter<T, maxfiltercount>(bq, count); -} - -template filter<float>* make_biquad_filter<float, 64>(const biquad_params<float>* bq, size_t count); -template filter<double>* make_biquad_filter<double, 64>(const biquad_params<double>* bq, size_t count); - -} // namespace CMT_ARCH_NAME -} // namespace kfr diff --git a/src/dft/CMakeLists.txt b/src/dft/CMakeLists.txt @@ -1,47 +1,6 @@ cmake_minimum_required(VERSION 3.12) -set(DFT_LIBS) - -if (KFR_ENABLE_DFT_MULTIARCH) - add_library(kfr_dft INTERFACE) - add_library(kfr_dft_all INTERFACE) - target_link_libraries(kfr_dft INTERFACE kfr kfr_dft_all) - target_compile_definitions( - kfr_dft - INTERFACE -DKFR_DFT_MULTI=1 - -DCMT_MULTI=1 - -DCMT_MULTI_ENABLED_SSE2=1 - -DCMT_MULTI_ENABLED_SSE41=1 - -DCMT_MULTI_ENABLED_AVX=1 - -DCMT_MULTI_ENABLED_AVX2=1 - -DCMT_MULTI_ENABLED_AVX512=1) - - add_arch_library(kfr_dft sse2 "${KFR_DFT_SRC}" "") - add_arch_library(kfr_dft sse41 "${KFR_DFT_SRC}" "") - add_arch_library(kfr_dft avx "${KFR_DFT_SRC}" "") - add_arch_library(kfr_dft avx2 "${KFR_DFT_SRC}" "") - add_arch_library(kfr_dft avx512 "${KFR_DFT_SRC}" "") - list( - APPEND - DFT_LIBS - kfr_dft_sse2 - kfr_dft_sse41 - kfr_dft_avx - kfr_dft_avx2 - kfr_dft_avx512) - - link_as_whole(kfr_dft_all INTERFACE kfr_dft_sse2) - -else () - add_library(kfr_dft ${KFR_DFT_SRC}) - target_link_libraries(kfr_dft kfr use_arch) - if (KFR_ENABLE_DFT_NP) - target_compile_definitions(kfr_dft PUBLIC -DKFR_DFT_NPo2) - else () - target_compile_definitions(kfr_dft PUBLIC -DKFR_DFT_NO_NPo2) - endif () - list(APPEND DFT_LIBS kfr_dft) -endif () +add_kfr_library(NAME kfr_dft MULTIARCH SOURCES ${KFR_DFT_SRC}) function (dft_compile_options LIB) if (MSVC AND CLANG) @@ -52,23 +11,14 @@ function (dft_compile_options LIB) endif () endfunction () -foreach (LIB IN LISTS DFT_LIBS) +foreach (LIB IN LISTS kfr_dft_LIBS) dft_compile_options(${LIB}) endforeach () if (KFR_INSTALL_LIBRARIES) - if (KFR_ENABLE_DFT_MULTIARCH) - install( - TARGETS kfr_dft_sse2 kfr_dft_sse41 kfr_dft_avx kfr_dft_avx2 - kfr_dft_avx512 - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) - else () - install( - TARGETS kfr_dft - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) - endif () + install( + TARGETS ${kfr_dft_LIBS} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin) endif () diff --git a/src/dft/convolution-impl.cpp b/src/dft/convolution-impl.cpp @@ -24,65 +24,12 @@ See https://www.kfrlib.com for details. */ #include <kfr/base/simd_expressions.hpp> -#include <kfr/simd/complex.hpp> #include <kfr/dft/convolution.hpp> +#include <kfr/simd/complex.hpp> +#include <kfr/multiarch.h> namespace kfr { -inline namespace CMT_ARCH_NAME -{ - -namespace intrinsics -{ - -template <typename T> -univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2) -{ - using ST = subtype<T>; - const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); - univector<complex<ST>> src1padded = src1; - univector<complex<ST>> src2padded = src2; - src1padded.resize(size); - src2padded.resize(size); - - dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size); - univector<u8> temp(dft->temp_size); - dft->execute(src1padded, src1padded, temp); - dft->execute(src2padded, src2padded, temp); - src1padded = src1padded * src2padded; - dft->execute(src1padded, src1padded, temp, true); - const ST invsize = reciprocal<ST>(static_cast<ST>(size)); - return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize; -} - -template <typename T> -univector<T> correlate(const univector_ref<const T>& src1, const univector_ref<const T>& src2) -{ - using ST = subtype<T>; - const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); - univector<complex<ST>> src1padded = src1; - univector<complex<ST>> src2padded = reverse(src2); - src1padded.resize(size); - src2padded.resize(size); - dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size); - univector<u8> temp(dft->temp_size); - dft->execute(src1padded, src1padded, temp); - dft->execute(src2padded, src2padded, temp); - src1padded = src1padded * src2padded; - dft->execute(src1padded, src1padded, temp, true); - const ST invsize = reciprocal<ST>(static_cast<ST>(size)); - return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize; -} - -template <typename T> -univector<T> autocorrelate(const univector_ref<const T>& src1) -{ - univector<T> result = correlate(src1, src1); - result = result.slice(result.size() / 2); - return result; -} - -} // namespace intrinsics template <typename T> convolve_filter<T>::convolve_filter(size_t size_, size_t block_size_) @@ -121,7 +68,68 @@ void convolve_filter<T>::set_data(const univector_ref<const T>& data) } template <typename T> -void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size) +void convolve_filter<T>::reset() +{ + for (auto& segment : segments) + { + process(segment, zeros()); + } + position = 0; + process(saved_input, zeros()); + input_position = 0; + process(overlap, zeros()); +} + +//------------------------------------------------------------------------------------- + +CMT_MULTI_PROTO(namespace impl { + template <typename T> + univector<T> convolve(const univector_ref<const T>&, const univector_ref<const T>&, bool); + + template <typename T> + class convolve_filter : public kfr::convolve_filter<T> + { + public: + void process_buffer_impl(T* output, const T* input, size_t size); + }; +}) + +inline namespace CMT_ARCH_NAME +{ + +namespace impl +{ + +template <typename T> +univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2, bool correlate) +{ + using ST = subtype<T>; + const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); + univector<complex<ST>> src1padded = src1; + univector<complex<ST>> src2padded; + if (correlate) + src2padded = reverse(src2); + else + src2padded = src2; + src1padded.resize(size); + src2padded.resize(size); + + dft_plan_ptr<ST> dft = dft_cache::instance().get(ctype_t<ST>(), size); + univector<u8> temp(dft->temp_size); + dft->execute(src1padded, src1padded, temp); + dft->execute(src2padded, src2padded, temp); + src1padded = src1padded * src2padded; + dft->execute(src1padded, src1padded, temp, true); + const ST invsize = reciprocal<ST>(static_cast<ST>(size)); + return truncate(real(src1padded), src1.size() + src2.size() - 1) * invsize; +} +template univector<f32> convolve<f32>(const univector_ref<const f32>&, const univector_ref<const f32>&, bool); +template univector<f64> convolve<f64>(const univector_ref<const f64>&, const univector_ref<const f64>&, bool); +template univector<c32> convolve<c32>(const univector_ref<const c32>&, const univector_ref<const c32>&, bool); +template univector<c64> convolve<c64>(const univector_ref<const c64>&, const univector_ref<const c64>&, bool); + +template <typename T> +void convolve_filter<T>::process_buffer_impl(T* output, const T* input, size_t size) { // Note that the conditionals in the following algorithm are meant to // reduce complexity in the common cases of either processing complete @@ -134,34 +142,35 @@ void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size) while (processed < size) { // Calculate how many samples to process this iteration. - auto const processing = std::min(size - processed, block_size - input_position); + auto const processing = std::min(size - processed, this->block_size - this->input_position); // Prepare input to forward FFT: - if (processing == block_size) + if (processing == this->block_size) { // No need to work with saved_input. - builtin_memcpy(scratch1.data(), input + processed, processing * sizeof(T)); + builtin_memcpy(this->scratch1.data(), input + processed, processing * sizeof(T)); } else { // Append this iteration's input to the saved_input current block. - builtin_memcpy(saved_input.data() + input_position, input + processed, processing * sizeof(T)); - builtin_memcpy(scratch1.data(), saved_input.data(), block_size * sizeof(T)); + builtin_memcpy(this->saved_input.data() + this->input_position, input + processed, + processing * sizeof(T)); + builtin_memcpy(this->scratch1.data(), this->saved_input.data(), this->block_size * sizeof(T)); } // Forward FFT saved_input block. - fft.execute(segments[position], scratch1, temp); + this->fft.execute(this->segments[this->position], this->scratch1, this->temp); - if (segments.size() == 1) + if (this->segments.size() == 1) { // Just one segment/block of history. // Y_k = H * X_k - fft_multiply(cscratch, ir_segments[0], segments[0], fft_multiply_pack); + fft_multiply(this->cscratch, this->ir_segments[0], this->segments[0], fft_multiply_pack); } else { // More than one segment/block of history so this is more involved. - if (input_position == 0) + if (this->input_position == 0) { // At the start of an input block, we premultiply the history from // previous input blocks with the extended filter blocks. @@ -169,139 +178,88 @@ void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size) // Y_(k-i,i) = H_i * X_(k-i) // premul += Y_(k-i,i) for i=1,...,N - fft_multiply(premul, ir_segments[1], segments[(position + 1) % segments.size()], - fft_multiply_pack); - for (size_t i = 2; i < segments.size(); i++) + fft_multiply(this->premul, this->ir_segments[1], + this->segments[(this->position + 1) % this->segments.size()], fft_multiply_pack); + for (size_t i = 2; i < this->segments.size(); i++) { - const size_t n = (position + i) % segments.size(); - fft_multiply_accumulate(premul, ir_segments[i], segments[n], fft_multiply_pack); + const size_t n = (this->position + i) % this->segments.size(); + fft_multiply_accumulate(this->premul, this->ir_segments[i], this->segments[n], + fft_multiply_pack); } } // Y_(k,0) = H_0 * X_k // Y_k = premul + Y_(k,0) - fft_multiply_accumulate(cscratch, premul, ir_segments[0], segments[position], fft_multiply_pack); + fft_multiply_accumulate(this->cscratch, this->premul, this->ir_segments[0], + this->segments[this->position], fft_multiply_pack); } // y_k = IFFT( Y_k ) - fft.execute(scratch2, cscratch, temp, cinvert_t{}); + this->fft.execute(this->scratch2, this->cscratch, this->temp, cinvert_t{}); // z_k = y_k + overlap process(make_univector(output + processed, processing), - scratch2.slice(input_position, processing) + overlap.slice(input_position, processing)); + this->scratch2.slice(this->input_position, processing) + + this->overlap.slice(this->input_position, processing)); - input_position += processing; + this->input_position += processing; processed += processing; // If a whole block was processed, prepare for next block. - if (input_position == block_size) + if (this->input_position == this->block_size) { // Input block k is complete. Move to (k+1)-th input block. - input_position = 0; + this->input_position = 0; // Zero out the saved_input if it will be used in the next iteration. auto const remaining = size - processed; - if (remaining < block_size && remaining > 0) + if (remaining < this->block_size && remaining > 0) { - process(saved_input, zeros()); + process(this->saved_input, zeros()); } - builtin_memcpy(overlap.data(), scratch2.data() + block_size, block_size * sizeof(T)); + builtin_memcpy(this->overlap.data(), this->scratch2.data() + this->block_size, + this->block_size * sizeof(T)); - position = position > 0 ? position - 1 : segments.size() - 1; + this->position = this->position > 0 ? this->position - 1 : this->segments.size() - 1; } } } -template <typename T> -void convolve_filter<T>::reset() -{ - for (auto& segment : segments) - { - process(segment, zeros()); - } - position = 0; - process(saved_input, zeros()); - input_position = 0; - process(overlap, zeros()); -} - -namespace intrinsics -{ - -template univector<float> convolve<float>(const univector_ref<const float>&, - const univector_ref<const float>&); -template univector<complex<float>> convolve<complex<float>>(const univector_ref<const complex<float>>&, - const univector_ref<const complex<float>>&); -template univector<float> correlate<float>(const univector_ref<const float>&, - const univector_ref<const float>&); -template univector<complex<float>> correlate<complex<float>>(const univector_ref<const complex<float>>&, - const univector_ref<const complex<float>>&); - -template univector<float> autocorrelate<float>(const univector_ref<const float>&); -template univector<complex<float>> autocorrelate<complex<float>>(const univector_ref<const complex<float>>&); - -} // namespace intrinsics - -template convolve_filter<float>::convolve_filter(size_t, size_t); -template convolve_filter<complex<float>>::convolve_filter(size_t, size_t); +template class convolve_filter<float>; +template class convolve_filter<double>; +template class convolve_filter<complex<float>>; +template class convolve_filter<complex<double>>; -template convolve_filter<float>::convolve_filter(const univector_ref<const float>&, size_t); -template convolve_filter<complex<float>>::convolve_filter(const univector_ref<const complex<float>>&, size_t); +} // namespace impl -template void convolve_filter<float>::set_data(const univector_ref<const float>&); -template void convolve_filter<complex<float>>::set_data(const univector_ref<const complex<float>>&); - -template void convolve_filter<float>::process_buffer(float* output, const float* input, size_t size); -template void convolve_filter<complex<float>>::process_buffer(complex<float>* output, - const complex<float>* input, size_t size); - -template void convolve_filter<float>::reset(); -template void convolve_filter<complex<float>>::reset(); +} // namespace CMT_ARCH_NAME -namespace intrinsics +#ifdef CMT_MULTI_NEEDS_GATE +namespace internal_generic { +template <typename T> +univector<T> convolve(const univector_ref<const T>& src1, const univector_ref<const T>& src2, bool correlate) +{ + CMT_MULTI_GATE(return ns::impl::convolve(src1, src2, correlate)); +} -template univector<double> convolve<double>(const univector_ref<const double>&, - const univector_ref<const double>&); -template univector<complex<double>> convolve<complex<double>>(const univector_ref<const complex<double>>&, - const univector_ref<const complex<double>>&); -template univector<double> correlate<double>(const univector_ref<const double>&, - const univector_ref<const double>&); -template univector<complex<double>> correlate<complex<double>>(const univector_ref<const complex<double>>&, - const univector_ref<const complex<double>>&); - -template univector<double> autocorrelate<double>(const univector_ref<const double>&); -template univector<complex<double>> autocorrelate<complex<double>>( - const univector_ref<const complex<double>>&); - -} // namespace intrinsics - -template convolve_filter<double>::convolve_filter(size_t, size_t); -template convolve_filter<complex<double>>::convolve_filter(size_t, size_t); - -template convolve_filter<double>::convolve_filter(const univector_ref<const double>&, size_t); -template convolve_filter<complex<double>>::convolve_filter(const univector_ref<const complex<double>>&, - size_t); - -template void convolve_filter<double>::set_data(const univector_ref<const double>&); -template void convolve_filter<complex<double>>::set_data(const univector_ref<const complex<double>>&); - -template void convolve_filter<double>::process_buffer(double* output, const double* input, size_t size); -template void convolve_filter<complex<double>>::process_buffer(complex<double>* output, - const complex<double>* input, size_t size); +template univector<f32> convolve<f32>(const univector_ref<const f32>&, const univector_ref<const f32>&, bool); +template univector<f64> convolve<f64>(const univector_ref<const f64>&, const univector_ref<const f64>&, bool); +template univector<c32> convolve<c32>(const univector_ref<const c32>&, const univector_ref<const c32>&, bool); +template univector<c64> convolve<c64>(const univector_ref<const c64>&, const univector_ref<const c64>&, bool); -template void convolve_filter<double>::reset(); -template void convolve_filter<complex<double>>::reset(); +} // namespace internal_generic template <typename T> -filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size) +void convolve_filter<T>::process_buffer(T* output, const T* input, size_t size) { - return new convolve_filter<T>(taps, block_size); + CMT_MULTI_GATE( + reinterpret_cast<ns::impl::convolve_filter<T>*>(this)->process_buffer_impl(output, input, size)); } -template filter<float>* make_convolve_filter(const univector_ref<const float>&, size_t); -template filter<complex<float>>* make_convolve_filter(const univector_ref<const complex<float>>&, size_t); -template filter<double>* make_convolve_filter(const univector_ref<const double>&, size_t); -template filter<complex<double>>* make_convolve_filter(const univector_ref<const complex<double>>&, size_t); +template class convolve_filter<float>; +template class convolve_filter<double>; +template class convolve_filter<complex<float>>; +template class convolve_filter<complex<double>>; +#endif -} // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/src/dft/dft.cpp b/src/dft/dft.cpp @@ -0,0 +1,60 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ + +#include <kfr/dft/fft.hpp> +#include <kfr/multiarch.h> + +namespace kfr +{ + +CMT_MULTI_PROTO(namespace impl { + template <typename T> + void dft_initialize(dft_plan<T> & plan); + template <typename T> + void dft_real_initialize(dft_plan_real<T> & plan); +}) + +#ifdef CMT_MULTI_NEEDS_GATE + +template <typename T> +void dft_initialize(dft_plan<T>& plan) +{ + CMT_MULTI_GATE(ns::impl::dft_initialize(plan)); +} +template <typename T> +void dft_real_initialize(dft_plan_real<T>& plan) +{ + CMT_MULTI_GATE(ns::impl::dft_real_initialize(plan)); +} + +template void dft_initialize<float>(dft_plan<float>&); +template void dft_initialize<double>(dft_plan<double>&); +template void dft_real_initialize<float>(dft_plan_real<float>&); +template void dft_real_initialize<double>(dft_plan_real<double>&); + +#endif + +} // namespace kfr diff --git a/src/dft/fft-impl.hpp b/src/dft/fft-impl.hpp @@ -1906,8 +1906,10 @@ void from_fmt(size_t real_size, complex<T>* rtwiddle, complex<T>* out, const com cwrite<1>(out, dc); } +#ifndef KFR_DFT_NO_NPo2 template <typename T> void init_dft(dft_plan<T>* self, size_t size, dft_order); +#endif template <typename T> KFR_INTRINSIC void initialize_stages(dft_plan<T>* self) @@ -1926,6 +1928,8 @@ KFR_INTRINSIC void initialize_stages(dft_plan<T>* self) } } +namespace impl +{ template <typename T> void dft_initialize(dft_plan<T>& plan) { @@ -1935,6 +1939,7 @@ void dft_initialize(dft_plan<T>& plan) initialize_data(&plan); initialize_order(&plan); } +} // namespace impl template <typename T> struct dft_stage_real_repack : dft_stage<T> @@ -1977,6 +1982,8 @@ public: } }; +namespace impl +{ template <typename T> void dft_real_initialize(dft_plan_real<T>& plan) { @@ -1989,6 +1996,7 @@ void dft_real_initialize(dft_plan_real<T>& plan) initialize_data(&plan); initialize_order(&plan); } +} // namespace impl } // namespace CMT_ARCH_NAME diff --git a/src/dft/fft-templates.hpp b/src/dft/fft-templates.hpp @@ -31,8 +31,11 @@ namespace kfr { inline namespace CMT_ARCH_NAME { +namespace impl +{ template void dft_initialize<FLOAT>(dft_plan<FLOAT>& plan); template void dft_real_initialize<FLOAT>(dft_plan_real<FLOAT>& plan); +} // namespace impl } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/src/dsp/CMakeLists.txt b/src/dsp/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.12) + +add_kfr_library(NAME kfr_dsp MULTIARCH SOURCES ${KFR_DSP_SRC}) + +if (KFR_INSTALL_LIBRARIES) + install( + TARGETS ${kfr_dsp_LIBS} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin) +endif () diff --git a/src/dsp/biquad.cpp b/src/dsp/biquad.cpp @@ -0,0 +1,66 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#include <kfr/multiarch.h> +#include <kfr/dsp/biquad.hpp> + +namespace kfr +{ + +CMT_MULTI_PROTO(namespace impl { + template <typename T> + expression_handle<T, 1> create_biquad_filter(const biquad_params<T>* bq, size_t count); +} // namespace impl +) + +inline namespace CMT_ARCH_NAME +{ +namespace impl +{ +template <typename T> +expression_handle<T, 1> create_biquad_filter(const biquad_params<T>* bq, size_t count) +{ + KFR_LOGIC_CHECK(count <= 64, "Too many biquad filters: ", count); + return biquad<64>(bq, count, placeholder<T>()); +} +template expression_handle<float, 1> create_biquad_filter<float>(const biquad_params<float>*, size_t); +template expression_handle<double, 1> create_biquad_filter<double>(const biquad_params<double>*, size_t); +} // namespace impl +} // namespace CMT_ARCH_NAME + +#ifdef CMT_MULTI_NEEDS_GATE + +template <typename T> +biquad_filter<T>::biquad_filter(const biquad_params<T>* bq, size_t count) +{ + CMT_MULTI_GATE(this->filter_expr = ns::impl::create_biquad_filter<T>(bq, count)); +} + +template biquad_filter<float>::biquad_filter(const biquad_params<float>*, size_t); +template biquad_filter<double>::biquad_filter(const biquad_params<double>*, size_t); + +#endif + +} // namespace kfr diff --git a/src/dsp/fir.cpp b/src/dsp/fir.cpp @@ -0,0 +1,92 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#include <kfr/dsp/fir.hpp> +#include <kfr/multiarch.h> + +namespace kfr +{ + +CMT_MULTI_PROTO(namespace impl { + template <typename T, typename U> + class fir_filter : public kfr::fir_filter<T, U> + { + public: + using kfr::fir_filter<T, U>::fir_filter; + + void process_buffer_impl(U* dest, const U* src, size_t size); + void process_expression_impl(U* dest, const expression_handle<U, 1>& src, size_t size); + }; +} // namespace impl +) + +inline namespace CMT_ARCH_NAME +{ +namespace impl +{ + +template <typename T, typename U> +void fir_filter<T, U>::process_buffer_impl(U* dest, const U* src, size_t size) +{ + make_univector(dest, size) = fir(this->state, make_univector(src, size)); +} +template <typename T, typename U> +void fir_filter<T, U>::process_expression_impl(U* dest, const expression_handle<U, 1>& src, size_t size) +{ + make_univector(dest, size) = fir(this->state, src); +} + +template class fir_filter<float, float>; +template class fir_filter<double, double>; +template class fir_filter<float, double>; +template class fir_filter<double, float>; +template class fir_filter<float, complex<float>>; +template class fir_filter<double, complex<double>>; + +} // namespace impl +} // namespace CMT_ARCH_NAME + +#ifdef CMT_MULTI_NEEDS_GATE + +template <typename T, typename U> +void fir_filter<T, U>::process_buffer(U* dest, const U* src, size_t size) +{ + make_univector(dest, size) = fir(this->state, make_univector(src, size)); +} +template <typename T, typename U> +void fir_filter<T, U>::process_expression(U* dest, const expression_handle<U, 1>& src, size_t size) +{ + make_univector(dest, size) = fir(this->state, src); +} +template class fir_filter<float, float>; +template class fir_filter<double, double>; +template class fir_filter<float, double>; +template class fir_filter<double, float>; +template class fir_filter<float, complex<float>>; +template class fir_filter<double, complex<double>>; + +#endif + +} // namespace kfr diff --git a/src/dsp/sample_rate_conversion.cpp b/src/dsp/sample_rate_conversion.cpp @@ -0,0 +1,179 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#include <kfr/dsp/sample_rate_conversion.hpp> +#include <kfr/multiarch.h> + +namespace kfr +{ +CMT_MULTI_PROTO(namespace impl { + template <typename T> + struct samplerate_converter : public kfr::samplerate_converter<T> + { + public: + using itype = typename kfr::samplerate_converter<T>::itype; + using ftype = typename kfr::samplerate_converter<T>::ftype; + void init(sample_rate_conversion_quality quality, itype interpolation_factor, itype decimation_factor, + subtype<T> scale, subtype<T> cutoff); + size_t process_impl(univector_ref<T> output, univector_ref<const T> input); + }; +} // namespace impl +) + +inline namespace CMT_ARCH_NAME +{ +namespace impl +{ + +template <typename T> +void samplerate_converter<T>::init(sample_rate_conversion_quality quality, itype interpolation_factor, + itype decimation_factor, subtype<T> scale, subtype<T> cutoff) +{ + this->kaiser_beta = this->window_param(quality); + this->depth = static_cast<itype>(this->filter_order(quality)); + this->input_position = 0; + this->output_position = 0; + + const i64 gcf = gcd(interpolation_factor, decimation_factor); + interpolation_factor /= gcf; + decimation_factor /= gcf; + + this->taps = this->depth * interpolation_factor; + this->order = size_t(this->depth * interpolation_factor - 1); + + this->interpolation_factor = interpolation_factor; + this->decimation_factor = decimation_factor; + + const itype halftaps = this->taps / 2; + this->filter = univector<T>(size_t(this->taps), T()); + this->delay = univector<T>(size_t(this->depth), T()); + + cutoff = cutoff - this->transition_width() / c_pi<ftype, 4>; + + cutoff = cutoff / std::max(decimation_factor, interpolation_factor); + + for (itype j = 0, jj = 0; j < this->taps; j++) + { + this->filter[size_t(j)] = + sinc((jj - halftaps) * cutoff * c_pi<ftype, 2>) * this->window(ftype(jj) / ftype(this->taps - 1)); + jj += size_t(interpolation_factor); + if (jj >= this->taps) + jj = jj - this->taps + 1; + } + + const T s = reciprocal(sum(this->filter)) * static_cast<ftype>(interpolation_factor * scale); + this->filter = this->filter * s; +} + +template <typename T> +size_t samplerate_converter<T>::process_impl(univector_ref<T> output, univector_ref<const T> input) +{ + const itype required_input_size = this->input_size_for_output(output.size()); + + const itype input_size = input.size(); + for (size_t i = 0; i < output.size(); i++) + { + const itype intermediate_index = + this->output_position_to_intermediate(static_cast<itype>(i) + this->output_position); + const itype intermediate_start = intermediate_index - this->taps + 1; + const std::lldiv_t input_pos = + floor_div(intermediate_start + this->interpolation_factor - 1, this->interpolation_factor); + const itype input_start = input_pos.quot; // first input sample + const itype tap_start = this->interpolation_factor - 1 - input_pos.rem; + const univector_ref<T> tap_ptr = this->filter.slice(static_cast<size_t>(tap_start * this->depth)); + + if (input_start >= this->input_position + input_size) + { + output[i] = T(0); + } + else if (input_start >= this->input_position) + { + output[i] = dotproduct( + truncate(padded(input.slice(input_start - this->input_position, this->depth)), this->depth), + tap_ptr.truncate(this->depth)); + } + else + { + const itype prev_count = this->input_position - input_start; + output[i] = dotproduct(this->delay.slice(size_t(this->depth - prev_count)), + tap_ptr.truncate(prev_count)) + + dotproduct(truncate(padded(input.truncate(size_t(this->depth - prev_count))), + size_t(this->depth - prev_count)), + tap_ptr.slice(size_t(prev_count), size_t(this->depth - prev_count))); + } + } + + if (required_input_size >= this->depth) + { + this->delay.slice(0, this->delay.size()) = + padded(input.slice(size_t(required_input_size - this->depth))); + } + else + { + this->delay.truncate(size_t(this->depth - required_input_size)) = + this->delay.slice(size_t(required_input_size)); + this->delay.slice(size_t(this->depth - required_input_size)) = padded(input); + } + + this->input_position += required_input_size; + this->output_position += output.size(); + + return required_input_size; +} + +template struct samplerate_converter<float>; +template struct samplerate_converter<double>; +template struct samplerate_converter<complex<float>>; +template struct samplerate_converter<complex<double>>; + +} // namespace impl +} // namespace CMT_ARCH_NAME + +#ifdef CMT_MULTI_NEEDS_GATE + +template <typename T> +samplerate_converter<T>::samplerate_converter(sample_rate_conversion_quality quality, + itype interpolation_factor, itype decimation_factor, + ftype scale, ftype cutoff) +{ + CMT_MULTI_GATE(reinterpret_cast<ns::impl::samplerate_converter<T>*>(this)->init( + quality, interpolation_factor, decimation_factor, scale, cutoff)); +} + +template <typename T> +size_t samplerate_converter<T>::process_impl(univector_ref<T> output, univector_ref<const T> input) +{ + CMT_MULTI_GATE( + return reinterpret_cast<ns::impl::samplerate_converter<T>*>(this)->process_impl(output, input)); +} + +template struct samplerate_converter<float>; +template struct samplerate_converter<double>; +template struct samplerate_converter<complex<float>>; +template struct samplerate_converter<complex<double>>; + +#endif + +} // namespace kfr diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.12) + +add_kfr_library(NAME kfr_io SOURCES ${KFR_IO_SRC}) + +if (KFR_INSTALL_LIBRARIES) + install( + TARGETS kfr_io + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin) +endif () diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -137,7 +137,7 @@ endif () add_executable(all_tests all_tests.cpp ${ALL_TESTS_CPP}) target_compile_definitions(all_tests PRIVATE KFR_NO_MAIN) -target_link_libraries(all_tests kfr use_arch) +target_link_libraries(all_tests kfr use_arch kfr_dsp) if (KFR_ENABLE_DFT) target_link_libraries(all_tests kfr_dft) target_link_libraries(dft_test kfr_dft) @@ -162,7 +162,7 @@ function (add_x86_test ARCH) if (KFR_ENABLE_DFT) target_sources(all_tests_${NAME} PRIVATE ${KFR_DFT_SRC}) endif () - target_link_libraries(all_tests_${NAME} kfr) + target_link_libraries(all_tests_${NAME} kfr kfr_dsp) target_set_arch(all_tests_${NAME} PRIVATE ${ARCH}) target_compile_definitions(all_tests_${NAME} PRIVATE KFR_NO_MAIN) target_compile_definitions(all_tests_${NAME} PUBLIC KFR_ENABLE_FLAC=1) diff --git a/tests/unit/dsp/biquad.cpp b/tests/unit/dsp/biquad.cpp @@ -112,6 +112,14 @@ TEST(biquad_lowpass2) CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0); }); } + +TEST(biquad_filter) +{ + biquad_params<float> params[16]; + auto f = biquad_filter<float>(params); + float buf[256]; + f.apply(buf); +} } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt @@ -21,7 +21,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) add_executable(sample_rate_converter sample_rate_converter.cpp) -target_link_libraries(sample_rate_converter kfr kfr_io use_arch) +target_link_libraries(sample_rate_converter kfr kfr_dsp kfr_io use_arch) add_executable(ebu_test ebu_test.cpp) target_link_libraries(ebu_test kfr kfr_io use_arch) diff --git a/update-sources.py b/update-sources.py @@ -39,7 +39,8 @@ list_sources("KFR_IO_SRC", "include/kfr/io", ['*.hpp', '*.h']) list_sources("KFR_RUNTIME_SRC", "include/kfr/runtime", ['*.hpp', '*.h']) list_sources("KFR_GRAPHICS_SRC", "include/kfr/graphics", ['*.hpp', '*.h']) list_sources("KFR_SRC", "include", ['*.hpp', '*.h']) -list_sources("KFR_DFT_SRC", "src/dft", ['*.cpp'], ["dft-src.cpp"]) +list_sources("KFR_DFT_SRC", "src/dft", ['*.cpp']) +list_sources("KFR_DSP_SRC", "src/dsp", ['*.cpp']) list_sources("KFR_IO_SRC", "src/io", ['*.cpp']) list_sources("KFR_UNITTEST_SRC", "tests/unit", ['*.cpp'])