commit c7cd3a9bcc73a10bbe0ba767d7bb6e0804ef8821
parent 6ca539f334b26762d59fce5294ddcec48e0d88b7
Author: [email protected] <[email protected]>
Date: Mon, 12 Nov 2018 23:06:12 +0300
Merge branch 'dev'
Diffstat:
4 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/include/kfr/base/platform.hpp b/include/kfr/base/platform.hpp
@@ -42,9 +42,10 @@ enum class cpu_t : int
sse42 = 5,
avx1 = 6,
avx2 = 7,
+ avx512 = 8, // F, CD, VL, DQ and BW
avx = static_cast<int>(avx1),
lowest = static_cast<int>(sse2),
- highest = static_cast<int>(avx2),
+ highest = static_cast<int>(avx512),
#endif
#ifdef CMT_ARCH_ARM
neon = 1,
@@ -70,12 +71,12 @@ constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) -
constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); }
#ifdef CMT_ARCH_X86
-constexpr auto cpu_list =
- cvals_t<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>();
+constexpr auto cpu_list = cvals_t<cpu_t, cpu_t::avx512, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3,
+ cpu_t::sse3, cpu_t::sse2>();
#else
constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>;
#endif
-}
+} // namespace internal
template <cpu_t cpu>
using cpuval_t = cval_t<cpu_t, cpu>;
@@ -87,7 +88,12 @@ constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuva
/// @brief Returns name of the cpu instruction set
CMT_UNUSED static const char* cpu_name(cpu_t set)
{
- static const char* names[] = { "common", "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" };
+#ifdef CMT_ARCH_X86
+ static const char* names[] = { "common", "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2", "avx512" };
+#endif
+#ifdef CMT_ARCH_ARM
+ static const char* names[] = { "common", "neon", "neon64" };
+#endif
if (set >= cpu_t::lowest && set <= cpu_t::highest)
return names[static_cast<size_t>(set)];
return "-";
@@ -105,7 +111,7 @@ constexpr inline const T& bitness_const(const T&, const T& x64)
return x64;
}
#else
-template <int = 0>
+template <int = 0>
constexpr inline const char* bitness_const(const char* x32, const char*)
{
return x32;
@@ -125,7 +131,8 @@ struct platform
constexpr static size_t maximum_vector_alignment = 32;
constexpr static size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
#ifdef CMT_ARCH_X86
- constexpr static size_t simd_register_count = bitness_const(8, 16);
+ constexpr static size_t simd_register_count =
+ c >= cpu_t::avx512 ? bitness_const(8, 32) : bitness_const(8, 16);
#endif
#ifdef CMT_ARCH_ARM
constexpr static size_t simd_register_count = 16;
@@ -136,14 +143,20 @@ struct platform
#ifdef CMT_ARCH_X86
constexpr static size_t native_float_vector_size =
- c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
+ c >= cpu_t::avx512 ? 64 :
+ c >= cpu_t::avx1 ? 32 :
+ c >= cpu_t::sse2 ? 16 :
+ common_float_vector_size;
#endif
#ifdef CMT_ARCH_ARM
constexpr static size_t native_float_vector_size = c == cpu_t::neon ? 16 : common_float_vector_size;
#endif
#ifdef CMT_ARCH_X86
constexpr static size_t native_int_vector_size =
- c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
+ c >= cpu_t::avx512 ? 64 :
+ c >= cpu_t::avx2 ? 32 :
+ c >= cpu_t::sse2 ? 16 :
+ common_int_vector_size;
#endif
#ifdef CMT_ARCH_ARM
constexpr static size_t native_int_vector_size = c == cpu_t::neon ? 16 : common_int_vector_size;
@@ -175,4 +188,4 @@ template <typename T, size_t N = platform<T>::vector_width>
struct vec;
template <typename T, size_t N = platform<T>::vector_width>
struct mask;
-}
+} // namespace kfr
diff --git a/include/kfr/cpuid/cpuid.hpp b/include/kfr/cpuid/cpuid.hpp
@@ -57,6 +57,7 @@ struct cpu_features
u32 hasAVX512DQ : 1;
u32 hasAVX512PF : 1;
u32 hasAVX512BW : 1;
+ u32 hasAVX512VL : 1;
u32 hasBMI1 : 1;
u32 hasBMI2 : 1;
u32 hasCLFSH : 1;
@@ -248,6 +249,7 @@ cpu_t detect_cpu()
c.hasAVX512CD = f_7_EBX >> 28 & 1;
c.hasSHA = f_7_EBX >> 29 & 1;
c.hasAVX512BW = f_7_EBX >> 30 & 1;
+ c.hasAVX512VL = f_7_EBX >> 31 & 1;
c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1;
c.hasLAHF = f_81_ECX >> 0 & 1;
c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1;
@@ -264,6 +266,9 @@ cpu_t detect_cpu()
c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (get_xcr0() & 0x06) == 0x06;
c.hasAVX512OSSUPPORT = c.hasAVXOSSUPPORT && c.hasAVX512F && c.hasOSXSAVE && (get_xcr0() & 0xE0) == 0xE0;
+ if (c.hasAVX512F && c.hasAVX512CD && c.hasAVX512VL && c.hasAVX512BW && c.hasAVX512DQ &&
+ c.hasAVX512OSSUPPORT)
+ return cpu_t::avx512;
if (c.hasAVX2 && c.hasAVXOSSUPPORT)
return cpu_t::avx2;
if (c.hasAVX && c.hasAVXOSSUPPORT)
@@ -278,7 +283,7 @@ cpu_t detect_cpu()
return cpu_t::sse2;
return cpu_t::lowest;
}
-}
+} // namespace internal
#else
template <size_t = 0>
@@ -288,4 +293,4 @@ cpu_t detect_cpu()
}
#endif
-}
+} // namespace kfr
diff --git a/include/kfr/version.hpp b/include/kfr/version.hpp
@@ -26,10 +26,12 @@
#pragma once
#include "base/types.hpp"
+#include "cpuid/cpuid_auto.hpp"
namespace kfr
{
/// @brief Returns string representation of the KFR version (including target architecture)
inline static const char* library_version() { return KFR_VERSION_FULL; }
+inline static const char* cpu_runtime() { return cpu_name(get_cpu()); }
}
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -124,7 +124,7 @@ TEST(fft_accuracy)
int main()
{
- println(library_version());
+ println(library_version(), " running on ", cpu_runtime());
return testo::run_all("", true);
}