Use float instead of double if double is not supported in simd - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 0c5bb07ce8ae146b691ee80beed4068caad3383e
parent 0ee4a81f2a710363b5242a3744e211eedb4ae00c
Author: [email protected] <[email protected]>
Date:   Tue,  9 Aug 2016 06:49:12 +0300

Use float instead of double if double is not supported in simd

Diffstat:
M examples/biquads.cpp  | 34 +++++++++++++++++-----------------
M examples/dft.cpp  | 9 ++++-----
M examples/fir.cpp  | 12 ++++++------
M examples/sample_rate_conversion.cpp  | 20 ++++++++++----------
M examples/window.cpp  | 2 +-
M include/kfr/dft/fft.hpp  | 6 +++---
M tests/complex_test.cpp  | 11 +++++++++++
M tests/conv_test.cpp  | 8 ++++----
M tests/dft_test.cpp  | 8 +++++++-
M tests/intrinsic_test.cpp  | 82 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------

10 files changed, 110 insertions(+), 82 deletions(-)
diff --git a/examples/biquads.cpp b/examples/biquads.cpp
@@ -32,53 +32,53 @@ int main(int argc, char** argv)
 
     const std::string options = "phaseresp=True";
 
-    univector<double, 128> output;
+    univector<fbase, 128> output;
     {
-        biquad_params<double> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5),
-                                       biquad_notch(0.4, 0.5) };
+        biquad_params<fbase> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5),
+                                      biquad_notch(0.4, 0.5) };
         output = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_notch", output, options + ", title='Four Biquad Notch filters'");
 
     {
-        biquad_params<double> bq[] = { biquad_lowpass(0.2, 0.9) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_lowpass(0.2, 0.9) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_lowpass", output, options + ", title='Biquad Low pass filter (0.2, 0.9)'");
 
     {
-        biquad_params<double> bq[] = { biquad_highpass(0.3, 0.1) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_highpass(0.3, 0.1) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_highpass", output, options + ", title='Biquad High pass filter (0.3, 0.1)'");
 
     {
-        biquad_params<double> bq[] = { biquad_peak(0.3, 0.5, +9.0) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_peak(0.3, 0.5, +9.0) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_peak", output, options + ", title='Biquad Peak filter (0.2, 0.5, +9)'");
 
     {
-        biquad_params<double> bq[] = { biquad_peak(0.3, 3.0, -2.0) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_peak(0.3, 3.0, -2.0) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_peak2", output, options + ", title='Biquad Peak filter (0.3, 3, -2)'");
 
     {
-        biquad_params<double> bq[] = { biquad_lowshelf(0.3, -1.0) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_lowshelf(0.3, -1.0) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_lowshelf", output, options + ", title='Biquad low shelf filter (0.3, -1)'");
 
     {
-        biquad_params<double> bq[] = { biquad_highshelf(0.3, +9.0) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_highshelf(0.3, +9.0) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_highshelf", output, options + ", title='Biquad high shelf filter (0.3, +9)'");
 
     {
-        biquad_params<double> bq[] = { biquad_bandpass(0.25, 0.2) };
-        output                     = biquad(bq, simpleimpulse());
+        biquad_params<fbase> bq[] = { biquad_bandpass(0.25, 0.2) };
+        output                    = biquad(bq, simpleimpulse());
     }
     plot_save("biquad_bandpass", output, options + ", title='Biquad band pass (0.25, 0.2)'");
 
diff --git a/examples/dft.cpp b/examples/dft.cpp
@@ -29,14 +29,13 @@ int main(int argc, char** argv)
 
     // fft size
     const size_t size = 128;
-    using float_type  = double;
 
     // initialize input & output buffers
-    univector<complex<float_type>, size> in  = sin(linspace(0.0, c_pi<float_type, 2> * 4.0, size));
-    univector<complex<float_type>, size> out = scalar(qnan);
+    univector<complex<fbase>, size> in  = sin(linspace(0.0, c_pi<fbase, 2> * 4.0, size));
+    univector<complex<fbase>, size> out = scalar(qnan);
 
     // initialize fft
-    const dft_plan<float_type> dft(size);
+    const dft_plan<fbase> dft(size);
 
     // allocate work buffer for fft (if needed)
     univector<u8> temp(dft.temp_size);
@@ -48,7 +47,7 @@ int main(int argc, char** argv)
     out = out / size;
 
     // get magnitude and convert to decibels
-    univector<float_type, size> dB = amp_to_dB(cabs(out));
+    univector<fbase, size> dB = amp_to_dB(cabs(out));
 
     println("max  = ", maxof(dB));
     println("min  = ", minof(dB));
diff --git a/examples/fir.cpp b/examples/fir.cpp
@@ -37,15 +37,15 @@ int main(int argc, char** argv)
 
     const std::string options = "phaseresp=False";
 
-    univector<double, 15> taps15;
-    univector<double, 127> taps127;
-    univector<double, 8191> taps8191;
+    univector<fbase, 15> taps15;
+    univector<fbase, 127> taps127;
+    univector<fbase, 8191> taps8191;
 
-    expression_pointer<double> hann = to_pointer(window_hann(taps15.size()));
+    expression_pointer<fbase> hann = to_pointer(window_hann(taps15.size()));
 
-    expression_pointer<double> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0));
+    expression_pointer<fbase> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0));
 
-    expression_pointer<double> blackman_harris = to_pointer(window_blackman_harris(taps8191.size()));
+    expression_pointer<fbase> blackman_harris = to_pointer(window_blackman_harris(taps8191.size()));
 
     fir_lowpass(taps15, 0.15, hann, true);
     plot_save("fir_lowpass_hann", taps15, options + ", title='15-point lowpass FIR, Hann window'");
diff --git a/examples/sample_rate_conversion.cpp b/examples/sample_rate_conversion.cpp
@@ -31,7 +31,7 @@ using namespace kfr;
 constexpr size_t input_sr  = 96000;
 constexpr size_t output_sr = 44100;
 constexpr size_t len       = 96000 * 6;
-constexpr f64 i32max       = 2147483647.0;
+constexpr fbase i32max     = 2147483647.0;
 
 int main(int argc, char** argv)
 {
@@ -39,11 +39,11 @@ int main(int argc, char** argv)
 
     const std::string options = "phaseresp=False";
 
-    univector<f64> swept_sine = swept(0.5, len);
+    univector<fbase> swept_sine = swept<fbase>(0.5, len);
 
     {
-        auto r = resampler(resample_quality::high, output_sr, input_sr, 1.0, 0.496);
-        univector<f64> resampled(len * output_sr / input_sr);
+        auto r = resampler<fbase>(resample_quality::high, output_sr, input_sr, 1.0, 0.496);
+        univector<fbase> resampled(len * output_sr / input_sr);
 
         const size_t destsize = r(resampled.data(), swept_sine);
 
@@ -57,8 +57,8 @@ int main(int argc, char** argv)
     }
 
     {
-        auto r = resampler(resample_quality::normal, output_sr, input_sr, 1.0, 0.496);
-        univector<f64> resampled(len * output_sr / input_sr);
+        auto r = resampler<fbase>(resample_quality::normal, output_sr, input_sr, 1.0, 0.496);
+        univector<fbase> resampled(len * output_sr / input_sr);
 
         const size_t destsize = r(resampled.data(), swept_sine);
 
@@ -72,8 +72,8 @@ int main(int argc, char** argv)
     }
 
     {
-        auto r = resampler(resample_quality::low, output_sr, input_sr, 1.0, 0.496);
-        univector<f64> resampled(len * output_sr / input_sr);
+        auto r = resampler<fbase>(resample_quality::low, output_sr, input_sr, 1.0, 0.496);
+        univector<fbase> resampled(len * output_sr / input_sr);
 
         const size_t destsize = r(resampled.data(), swept_sine);
 
@@ -87,8 +87,8 @@ int main(int argc, char** argv)
     }
 
     {
-        auto r = resampler(resample_quality::draft, output_sr, input_sr, 1.0, 0.496);
-        univector<f64> resampled(len * output_sr / input_sr);
+        auto r = resampler<fbase>(resample_quality::draft, output_sr, input_sr, 1.0, 0.496);
+        univector<fbase> resampled(len * output_sr / input_sr);
 
         const size_t destsize = r(resampled.data(), swept_sine);
 
diff --git a/examples/window.cpp b/examples/window.cpp
@@ -30,7 +30,7 @@ int main(int argc, char** argv)
     const std::string options = "freqresp=True, dots=True, padwidth=1024, "
                                 "log_freq=False, horizontal=False, normalized_freq=True";
 
-    univector<double, 64> output;
+    univector<fbase, 64> output;
     output = window_hann(output.size());
     plot_save("window_hann", output, options + ", title='Hann window'");
 
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -214,9 +214,9 @@ CMT_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size)
     }
     else
     {
-        double kth  = c_pi<double, 2> * (n / static_cast<double>(size));
-        double tcos = +kfr::cos(kth);
-        double tsin = -kfr::sin(kth);
+        fbase kth  = c_pi<fbase, 2> * (n / static_cast<fbase>(size));
+        fbase tcos = +kfr::cos(kth);
+        fbase tsin = -kfr::sin(kth);
         return make_vector(static_cast<T>(tcos), static_cast<T>(tsin));
     }
 }
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -101,6 +101,15 @@ TEST(complex_math)
     CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849, 2.2873552871788423 });
     CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442, 1.2779225526272695 });
     CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131, 7.439803369574931 });
+
+#ifdef KFR_NATIVE_F64
+    CHECK(csin(c64{ 1.f, 1.f }) == c64{ 1.2984575814159773, 0.634963914784736 });
+    CHECK(ccos(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, -0.9888977057628651 });
+    CHECK(csinh(c64{ 1.f, 1.f }) == c64{ 0.634963914784736, 1.2984575814159773 });
+    CHECK(ccosh(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, 0.9888977057628651 });
+    CHECK(clog(c64{ 1.f, 1.f }) == c64{ 0.34657359027997264, 0.7853981633974483 });
+    CHECK(cexp(c64{ 1.f, 1.f }) == c64{ 1.4686939399158849, 2.2873552871788423 });
+#endif
 }
 
 TEST(complex_read_write)
@@ -168,10 +177,12 @@ int main(int argc, char** argv)
 {
     println(library_version());
 
+#ifdef CMT_ARCH_SSE2
     static_assert(vector_width<f32, cpu_t::sse2> == 4, "");
     static_assert(vector_width<c32, cpu_t::sse2> == 2, "");
     static_assert(vector_width<i32, cpu_t::sse2> == 4, "");
     static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, "");
+#endif
 
     static_assert(is_numeric<vec<complex<float>, 4>>::value, "");
     static_assert(is_numeric_args<vec<complex<float>, 4>>::value, "");
diff --git a/tests/conv_test.cpp b/tests/conv_test.cpp
@@ -19,11 +19,11 @@ using namespace kfr;
 
 TEST(test_convolve)
 {
-    univector<double, 5> a({ 1, 2, 3, 4, 5 });
-    univector<double, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 });
-    univector<double> c = convolve(a, b);
+    univector<fbase, 5> a({ 1, 2, 3, 4, 5 });
+    univector<fbase, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 });
+    univector<fbase> c = convolve(a, b);
     CHECK(c.size() == 9);
-    CHECK(rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001);
+    CHECK(rms(c - univector<fbase>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001);
 }
 
 int main(int argc, char** argv)
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -22,12 +22,18 @@
 
 using namespace kfr;
 
+#ifdef KFR_NATIVE_F64
+constexpr ctypes_t<float, double> float_types{};
+#else
+constexpr ctypes_t<float> float_types{};
+#endif
+
 TEST(fft_accuracy)
 {
     testo::active_test()->show_progress = true;
     random_bit_generator gen(2247448713, 915890490, 864203735, 2982561);
 
-    testo::matrix(named("type")       = ctypes<float, double>, //
+    testo::matrix(named("type")       = float_types, //
                   named("inverse")    = std::make_tuple(false, true), //
                   named("log2(size)") = make_range(1, 21), //
                   [&gen](auto type, bool inverse, size_t log2size) {
diff --git a/tests/intrinsic_test.cpp b/tests/intrinsic_test.cpp
@@ -11,39 +11,52 @@
 
 using namespace kfr;
 
-constexpr ctypes_t<i8x1, i16x1, i32x1, i64x1, //
-                   i8x2, i16x2, i32x2, i64x2, //
-                   i8x4, i16x4, i32x4, i64x4, //
-                   i8x8, i16x8, i32x8, i64x8, //
-                   i8x16, i16x16, i32x16, i64x16, //
-                   i8x3, i16x3, i32x3, i64x3 //
+constexpr ctypes_t<i8x1, i8x2, i8x4, i8x8, i8x16, i8x3, //
+                   i16x1, i16x2, i16x4, i16x8, i16x16, i16x3, //
+                   i32x1, i32x2, i32x4, i32x8, i32x16, i32x3 //
+#ifdef KFR_NATIVE_I64
+                   ,
+                   i64x1, i64x2, i64x4, i64x8, i64x16, i64x3 //
+#endif
                    >
     signed_types{};
 
-constexpr ctypes_t<u8x1, u16x1, u32x1, u64x1, //
-                   u8x2, u16x2, u32x2, u64x2, //
-                   u8x4, u16x4, u32x4, u64x4, //
-                   u8x8, u16x8, u32x8, u64x8, //
-                   u8x16, u16x16, u32x16, u64x16, //
-                   u8x3, u16x3, u32x3, u64x3 //
+constexpr ctypes_t<u8x1, u8x2, u8x4, u8x8, u8x16, u8x3, //
+                   u16x1, u16x2, u16x4, u16x8, u16x16, u16x3, //
+                   u32x1, u32x2, u32x4, u32x8, u32x16, u32x3 //
+#ifdef KFR_NATIVE_I64
+                   ,
+                   u64x1, u64x2, u64x4, u64x8, u64x16, u64x3 //
+#endif
                    >
     unsigned_types{};
 
-constexpr ctypes_t<f32x1, f64x1, //
-                   f32x2, f64x2, //
-                   f32x4, f64x4, //
-                   f32x8, f64x8, //
-                   f32x16, f64x16, //
-                   f32x3, f64x3 //
+constexpr ctypes_t<f32x1, f32x2, f32x4, f32x8, f32x16, f32x3 //
+#ifdef KFR_NATIVE_F64
+                   ,
+                   f64x1, f64x2, f64x4, f64x8, f64x16, f64x3 //
+#endif
                    >
     float_types{};
 
-constexpr ctypes_t<u8x1, i8x1, u16x1, i16x1, u32x1, i32x1, u64x1, i64x1, f32x1, f64x1, //
-                   u8x2, i8x2, u16x2, i16x2, u32x2, i32x2, u64x2, i64x2, f32x2, f64x2, //
-                   u8x4, i8x4, u16x4, i16x4, u32x4, i32x4, u64x4, i64x4, f32x4, f64x4, //
-                   u8x8, i8x8, u16x8, i16x8, u32x8, i32x8, u64x8, i64x8, f32x8, f64x8, //
-                   u8x16, i8x16, u16x16, i16x16, u32x16, i32x16, u64x16, i64x16, f32x16, f64x16, //
-                   u8x3, i8x3, u16x3, i16x3, u32x3, i32x3, u64x3, i64x3, f32x3, f64x3 //
+constexpr ctypes_t<i8x1, i8x2, i8x4, i8x8, i8x16, i8x3, //
+                   i16x1, i16x2, i16x4, i16x8, i16x16, i16x3, //
+                   i32x1, i32x2, i32x4, i32x8, i32x16, i32x3, //
+#ifdef KFR_NATIVE_I64
+
+                   i64x1, i64x2, i64x4, i64x8, i64x16, i64x3, //
+#endif
+                   u8x1, u8x2, u8x4, u8x8, u8x16, u8x3, //
+                   u16x1, u16x2, u16x4, u16x8, u16x16, u16x3, //
+                   u32x1, u32x2, u32x4, u32x8, u32x16, u32x3, //
+#ifdef KFR_NATIVE_I64
+                   u64x1, u64x2, u64x4, u64x8, u64x16, u64x3, //
+#endif
+                   f32x1, f32x2, f32x4, f32x8, f32x16, f32x3 //
+#ifdef KFR_NATIVE_F64
+                   ,
+                   f64x1, f64x2, f64x4, f64x8, f64x16, f64x3 //
+#endif
                    >
     all_types{};
 
@@ -145,13 +158,13 @@ TEST(intrin_abs)
 
 TEST(intrin_sqrt)
 {
-    testo::assert_is_same<decltype(kfr::sqrt(9)), double>();
-    testo::assert_is_same<decltype(kfr::sqrt(make_vector(9))), f64x1>();
-    testo::assert_is_same<decltype(kfr::sqrt(make_vector(9, 25))), f64x2>();
+    testo::assert_is_same<decltype(kfr::sqrt(9)), fbase>();
+    testo::assert_is_same<decltype(kfr::sqrt(make_vector(9))), vec<fbase, 1>>();
+    testo::assert_is_same<decltype(kfr::sqrt(make_vector(9, 25))), vec<fbase, 2>>();
     CHECK(kfr::sqrt(9) == 3.0);
     CHECK(kfr::sqrt(-9) == qnan);
-    CHECK(kfr::sqrt(make_vector(9)) == make_vector(3.0));
-    CHECK(kfr::sqrt(make_vector(-9)) == make_vector(qnan));
+    CHECK(kfr::sqrt(make_vector(9)) == make_vector<fbase>(3.0));
+    CHECK(kfr::sqrt(make_vector(-9)) == make_vector<fbase>(qnan));
     testo::matrix(named("type") = float_types, named("value") = std::vector<int>{ 0, 2, 65536 },
                   [](auto type, int value) {
                       using T    = type_of<decltype(type)>;
@@ -180,8 +193,8 @@ TEST(intrin_round)
     CHECK(kfr::fract(100) == 0);
 
     testo::matrix(named("type")  = float_types,
-                  named("value") = std::vector<double>{ -1.51, -1.49, 0.0, +1.49, +1.51 },
-                  [](auto type, double value) {
+                  named("value") = std::vector<fbase>{ -1.51, -1.49, 0.0, +1.49, +1.51 },
+                  [](auto type, fbase value) {
                       using T    = type_of<decltype(type)>;
                       using Tsub = subtype<T>;
                       const T x(value);
@@ -201,10 +214,9 @@ TEST(intrin_min_max)
     CHECK(min(pack(1, 2, 3), 2) == pack(1, 2, 2));
     CHECK(min(pack(1., 2., 3.), 2) == pack(1., 2., 2.));
 
-    testo::matrix(named("type") = float_types,
-                  named("value") =
-                      std::vector<std::pair<double, double>>{ { -100, +100 }, { infinity, 0.0 } },
-                  [](auto type, std::pair<double, double> value) {
+    testo::matrix(named("type")  = float_types,
+                  named("value") = std::vector<std::pair<fbase, fbase>>{ { -100, +100 }, { infinity, 0.0 } },
+                  [](auto type, std::pair<fbase, fbase> value) {
                       using T    = type_of<decltype(type)>;
                       using Tsub = subtype<T>;
                       const T x(value.first);

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	examples/biquads.cpp	\|	34	+++++++++++++++++-----------------
M	examples/dft.cpp	\|	9	++++-----
M	examples/fir.cpp	\|	12	++++++------
M	examples/sample_rate_conversion.cpp	\|	20	++++++++++----------
M	examples/window.cpp	\|	2	+-
M	include/kfr/dft/fft.hpp	\|	6	+++---
M	tests/complex_test.cpp	\|	11	+++++++++++
M	tests/conv_test.cpp	\|	8	++++----
M	tests/dft_test.cpp	\|	8	+++++++-
M	tests/intrinsic_test.cpp	\|	82	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------