kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit c7cd3a9bcc73a10bbe0ba767d7bb6e0804ef8821
parent 6ca539f334b26762d59fce5294ddcec48e0d88b7
Author: [email protected] <[email protected]>
Date:   Mon, 12 Nov 2018 23:06:12 +0300

Merge branch 'dev'

Diffstat:
Minclude/kfr/base/platform.hpp | 33+++++++++++++++++++++++----------
Minclude/kfr/cpuid/cpuid.hpp | 9+++++++--
Minclude/kfr/version.hpp | 2++
Mtests/dft_test.cpp | 2+-
4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/kfr/base/platform.hpp b/include/kfr/base/platform.hpp @@ -42,9 +42,10 @@ enum class cpu_t : int sse42 = 5, avx1 = 6, avx2 = 7, + avx512 = 8, // F, CD, VL, DQ and BW avx = static_cast<int>(avx1), lowest = static_cast<int>(sse2), - highest = static_cast<int>(avx2), + highest = static_cast<int>(avx512), #endif #ifdef CMT_ARCH_ARM neon = 1, @@ -70,12 +71,12 @@ constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); } #ifdef CMT_ARCH_X86 -constexpr auto cpu_list = - cvals_t<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>(); +constexpr auto cpu_list = cvals_t<cpu_t, cpu_t::avx512, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, + cpu_t::sse3, cpu_t::sse2>(); #else constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>; #endif -} +} // namespace internal template <cpu_t cpu> using cpuval_t = cval_t<cpu_t, cpu>; @@ -87,7 +88,12 @@ constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuva /// @brief Returns name of the cpu instruction set CMT_UNUSED static const char* cpu_name(cpu_t set) { - static const char* names[] = { "common", "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" }; +#ifdef CMT_ARCH_X86 + static const char* names[] = { "common", "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2", "avx512" }; +#endif +#ifdef CMT_ARCH_ARM + static const char* names[] = { "common", "neon", "neon64" }; +#endif if (set >= cpu_t::lowest && set <= cpu_t::highest) return names[static_cast<size_t>(set)]; return "-"; @@ -105,7 +111,7 @@ constexpr inline const T& bitness_const(const T&, const T& x64) return x64; } #else -template <int = 0> +template <int = 0> constexpr inline const char* bitness_const(const char* x32, const char*) { return x32; @@ -125,7 +131,8 @@ struct platform constexpr static size_t maximum_vector_alignment = 32; constexpr static size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1; #ifdef CMT_ARCH_X86 - constexpr static size_t simd_register_count = bitness_const(8, 16); + constexpr static size_t simd_register_count = + c >= cpu_t::avx512 ? bitness_const(8, 32) : bitness_const(8, 16); #endif #ifdef CMT_ARCH_ARM constexpr static size_t simd_register_count = 16; @@ -136,14 +143,20 @@ struct platform #ifdef CMT_ARCH_X86 constexpr static size_t native_float_vector_size = - c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size; + c >= cpu_t::avx512 ? 64 : + c >= cpu_t::avx1 ? 32 : + c >= cpu_t::sse2 ? 16 : + common_float_vector_size; #endif #ifdef CMT_ARCH_ARM constexpr static size_t native_float_vector_size = c == cpu_t::neon ? 16 : common_float_vector_size; #endif #ifdef CMT_ARCH_X86 constexpr static size_t native_int_vector_size = - c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size; + c >= cpu_t::avx512 ? 64 : + c >= cpu_t::avx2 ? 32 : + c >= cpu_t::sse2 ? 16 : + common_int_vector_size; #endif #ifdef CMT_ARCH_ARM constexpr static size_t native_int_vector_size = c == cpu_t::neon ? 16 : common_int_vector_size; @@ -175,4 +188,4 @@ template <typename T, size_t N = platform<T>::vector_width> struct vec; template <typename T, size_t N = platform<T>::vector_width> struct mask; -} +} // namespace kfr diff --git a/include/kfr/cpuid/cpuid.hpp b/include/kfr/cpuid/cpuid.hpp @@ -57,6 +57,7 @@ struct cpu_features u32 hasAVX512DQ : 1; u32 hasAVX512PF : 1; u32 hasAVX512BW : 1; + u32 hasAVX512VL : 1; u32 hasBMI1 : 1; u32 hasBMI2 : 1; u32 hasCLFSH : 1; @@ -248,6 +249,7 @@ cpu_t detect_cpu() c.hasAVX512CD = f_7_EBX >> 28 & 1; c.hasSHA = f_7_EBX >> 29 & 1; c.hasAVX512BW = f_7_EBX >> 30 & 1; + c.hasAVX512VL = f_7_EBX >> 31 & 1; c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1; c.hasLAHF = f_81_ECX >> 0 & 1; c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1; @@ -264,6 +266,9 @@ cpu_t detect_cpu() c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (get_xcr0() & 0x06) == 0x06; c.hasAVX512OSSUPPORT = c.hasAVXOSSUPPORT && c.hasAVX512F && c.hasOSXSAVE && (get_xcr0() & 0xE0) == 0xE0; + if (c.hasAVX512F && c.hasAVX512CD && c.hasAVX512VL && c.hasAVX512BW && c.hasAVX512DQ && + c.hasAVX512OSSUPPORT) + return cpu_t::avx512; if (c.hasAVX2 && c.hasAVXOSSUPPORT) return cpu_t::avx2; if (c.hasAVX && c.hasAVXOSSUPPORT) @@ -278,7 +283,7 @@ cpu_t detect_cpu() return cpu_t::sse2; return cpu_t::lowest; } -} +} // namespace internal #else template <size_t = 0> @@ -288,4 +293,4 @@ cpu_t detect_cpu() } #endif -} +} // namespace kfr diff --git a/include/kfr/version.hpp b/include/kfr/version.hpp @@ -26,10 +26,12 @@ #pragma once #include "base/types.hpp" +#include "cpuid/cpuid_auto.hpp" namespace kfr { /// @brief Returns string representation of the KFR version (including target architecture) inline static const char* library_version() { return KFR_VERSION_FULL; } +inline static const char* cpu_runtime() { return cpu_name(get_cpu()); } } diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -124,7 +124,7 @@ TEST(fft_accuracy) int main() { - println(library_version()); + println(library_version(), " running on ", cpu_runtime()); return testo::run_all("", true); }