kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 23256d01232179aada01c60b28a512e6702192a6
parent fc1b84b976c9c5d4bdcc625ed146d40f4eb879c9
Author: [email protected] <[email protected]>
Date:   Mon,  8 Aug 2016 06:39:08 +0300

Xcode 6.3 support

Diffstat:
Minclude/kfr/base/basic_expressions.hpp | 3++-
Minclude/kfr/base/memory.hpp | 2+-
Minclude/kfr/base/pointer.hpp | 4++--
Minclude/kfr/base/types.hpp | 10+++++-----
Minclude/kfr/base/vec.hpp | 8++++----
Minclude/kfr/cident.h | 17++++++++++++++++-
Minclude/kfr/cometa.hpp | 4++--
Minclude/kfr/dft/bitrev.hpp | 6+++---
Minclude/kfr/dft/fft.hpp | 28++++++++++++++--------------
Minclude/kfr/dft/ft.hpp | 35+++++++++++++++--------------------
Minclude/kfr/dsp/window.hpp | 2+-
11 files changed, 65 insertions(+), 54 deletions(-)

diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp @@ -348,7 +348,8 @@ struct multioutput : output_expression template <typename T, size_t N> void operator()(coutput_t, size_t index, const vec<T, N>& x) { - cfor(csize<0>, csize<sizeof...(E)>, [&](auto n) { std::get<val_of(n)>(outputs)(coutput, index, x); }); + cfor(csize<0>, csize<sizeof...(E)>, + [&](auto n) { std::get<val_of(decltype(n)())>(outputs)(coutput, index, x); }); } std::tuple<E...> outputs; diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp @@ -85,7 +85,7 @@ inline void aligned_free(void* ptr) template <typename T = void, size_t alignment = native_cache_alignment> CMT_INLINE T* aligned_allocate(size_t size = 1) { - T* ptr = static_cast<T*>(__builtin_assume_aligned( + T* ptr = static_cast<T*>(CMT_ASSUME_ALIGNED( internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment)); return ptr; } diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp @@ -134,8 +134,8 @@ expression_vtable<T, maxwidth> make_expression_vtable_impl() result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>); cforeach(csizeseq<size>, [&](auto u) { - constexpr size_t N = 1 << val_of(u); - result.get(csize<2 + val_of(u)>) = + constexpr size_t N = 1 << val_of(decltype(u)()); + result.get(csize<2 + val_of(decltype(u)())>) = reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>()); }); return result; diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -739,9 +739,9 @@ template <typename... Ts> using is_numeric_args = and_t<is_numeric<Ts>...>; template <typename T, cpu_t c = cpu_t::native> -constexpr size_t vector_width = cmax(size_t(1), typeclass<T> == datatype::f - ? native_float_vector_size<c> / sizeof(T) - : native_int_vector_size<c> / sizeof(T)); +constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f + ? native_float_vector_size<c> / sizeof(T) + : native_int_vector_size<c> / sizeof(T)); template <cpu_t c> constexpr size_t vector_width<void, c> = 0; @@ -750,7 +750,7 @@ namespace internal { template <cpu_t c> -constexpr size_t native_vector_alignment = cmax(native_float_vector_size<c>, native_int_vector_size<c>); +constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>); template <cpu_t c> constexpr bool fast_unaligned = @@ -781,7 +781,7 @@ template <typename T, cpu_t c> constexpr size_t vector_capacity = native_register_count* vector_width<T, c>; template <typename T, cpu_t c> -constexpr size_t maximum_vector_size = cmin(static_cast<size_t>(32), vector_capacity<T, c> / 4); +constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4); } } namespace cometa diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -375,8 +375,8 @@ constexpr CMT_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept constexpr CMT_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } -template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)> -CMT_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest); +template <typename T, size_t N, size_t... Sizes> +CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest); namespace internal { @@ -944,8 +944,8 @@ CMT_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Si } } -template <typename T, size_t N, size_t... Sizes, size_t Nout> -CMT_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest) +template <typename T, size_t N, size_t... Sizes> +CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest) { return internal::concat(x, rest...); } diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -1,7 +1,7 @@ #pragma once #ifdef LIBC_WORKAROUND_GETS -extern char *gets (char *__s); +extern char* gets(char* __s); #endif #if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) @@ -278,6 +278,21 @@ extern char *gets (char *__s); #define CMT_HAS_BUILTIN(builtin) 0 #endif +#if CMT_HAS_BUILTIN(CMT_ASSUME) +#define CMT_ASSUME(x) __builtin_assume(x) +#else +#define CMT_ASSUME(x) \ + do \ + { \ + } while (0) +#endif + +#if CMT_HAS_BUILTIN(CMT_ASSUME) +#define CMT_ASSUME_ALIGNED(x, a) __builtin_assume_aligned(x, a) +#else +#define CMT_ASSUME_ALIGNED(x, a) x +#endif + #ifdef __has_feature #define CMT_HAS_FEATURE(feature) __has_feature(feature) #else diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -24,12 +24,12 @@ using void_t = void; // Workaround for GCC 4.8 template <typename T> -constexpr const T& cmax(const T& x, const T& y) +constexpr const T& const_max(const T& x, const T& y) { return x > y ? x : y; } template <typename T> -constexpr const T& cmin(const T& x, const T& y) +constexpr const T& const_min(const T& x, const T& y) { return x < y ? x : y; } diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp @@ -85,7 +85,7 @@ KFR_INTRIN void fft_reorder_swap(T* inout, size_t i) template <size_t log2n, size_t bitrev, typename T> KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); using cxx = cvec<T, 16>; constexpr size_t N = 1 << log2n; constexpr size_t N4 = 2 * N / 4; @@ -102,7 +102,7 @@ KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j) template <size_t log2n, size_t bitrev, typename T> KFR_INTRIN void fft_reorder_swap(T* inout, size_t i, size_t j) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); using cxx = cvec<T, 16>; constexpr size_t N = 1 << log2n; constexpr size_t N4 = 2 * N / 4; @@ -259,7 +259,7 @@ void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>) template <typename T, bool use_br2> KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbool_t<use_br2>) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4); const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4); cwrite_reordered(inout + j, vi, N4, cbool<use_br2>); diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -288,12 +288,12 @@ KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t constexpr static size_t prefetch_offset = width * 8; const auto N4 = N / csize<4>; const auto N43 = N4 * csize<3>; - __builtin_assume(blocks > 0); - __builtin_assume(N > 0); - __builtin_assume(N4 > 0); + CMT_ASSUME(blocks > 0); + CMT_ASSUME(N > 0); + CMT_ASSUME(N4 > 0); CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++) { -#pragma clang loop unroll_count(default_unroll_count) +#pragma clang loop unroll_count(2) for (size_t n2 = 0; n2 < N4; n2 += width) { if (prefetch) @@ -315,7 +315,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfal cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = 32 * 4; for (size_t b = 0; b < blocks; b++) { @@ -352,7 +352,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<8>, size_t blocks, csize_t<width>, cfals cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = width * 16; for (size_t b = 0; b < blocks; b += 2) { @@ -377,7 +377,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfal cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = width * 4; #pragma clang loop unroll_count(2) for (size_t b = 0; b < blocks; b += 2) @@ -409,7 +409,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<4>, size_t blocks, csize_t<width>, cfals complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { constexpr static size_t prefetch_offset = width * 4; - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b += 4) { @@ -453,8 +453,8 @@ protected: if (splitin) in = out; const size_t stage_size = this->stage_size; - __builtin_assume(stage_size >= 2048); - __builtin_assume(stage_size % 2048 == 0); + CMT_ASSUME(stage_size >= 2048); + CMT_ASSUME(stage_size % 2048 == 0); radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); } @@ -836,14 +836,14 @@ struct dft_plan const size_t log2n = ilog2(size); cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n, [&](auto log2n) { - add_stage<internal::fft_specialization_t<T, val_of(log2n), false>::template type>( - size, type); + add_stage<internal::fft_specialization_t<T, val_of(decltype(log2n)()), + false>::template type>(size, type); }, [&]() { cswitch(cfalse_true, is_even(log2n), [&](auto is_even) { make_fft(size, type, is_even, ctrue); - add_stage<internal::fft_reorder_stage_impl_t<T, val_of(is_even)>::template type>( - size, type); + add_stage<internal::fft_reorder_stage_impl_t< + T, val_of(decltype(is_even)())>::template type>(size, type); }); }); initialize(type); diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp @@ -60,7 +60,7 @@ CMT_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y) /// Complex Multiplication template <typename T, size_t N1, size_t N2> -CMT_INLINE vec<T, std::max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y) +CMT_INLINE vec<T, const_max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y) { return internal::cmul_impl(x, y); } @@ -359,8 +359,6 @@ CMT_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * gro return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>); } -constexpr size_t default_unroll_count = 2; - template <typename T> KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0, cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5, @@ -441,15 +439,15 @@ constexpr KFR_INTRIN T chsign(T x) template <typename T, size_t N, size_t size, size_t start, size_t step, bool inverse = false, size_t... indices> -constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>) +constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(csizes_t<indices...>) { return make_vector((indices & 1 ? chsign<inverse>(-sin_using_table<T>(size, (indices / 2 * step + start))) : cos_using_table<T>(size, (indices / 2 * step + start)))...); } template <typename T, size_t width, size_t... indices> -constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>, - size_t size, size_t start, size_t step) +constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(csizes_t<indices...>, size_t size, size_t start, + size_t step) { return make_vector((indices & 1 ? -sin_using_table<T>(size, indices / 2 * step + start) : cos_using_table<T>(size, indices / 2 * step + start))...); @@ -458,14 +456,13 @@ constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequen template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false> constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle() { - return get_fixed_twiddle_helper<T, width, size, start, step, inverse>( - std::make_index_sequence<width * 2>()); + return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(csizeseq<width * 2>); } template <typename T, size_t width> constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0) { - return get_fixed_twiddle_helper<T, width>(std::make_index_sequence<width * 2>(), start, step, size); + return get_fixed_twiddle_helper<T, width>(csizeseq<width * 2>, start, step, size); } template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false> @@ -1280,9 +1277,8 @@ KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddl // Non-final template <typename T, size_t width, size_t radix, bool inverse, size_t... I> -KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, - cbool_t<inverse>, complex<T>* out, const complex<T>* in, - const complex<T>* tw, size_t stride) +KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>, + complex<T>* out, const complex<T>* in, const complex<T>* tw, size_t stride) { carray<cvec<T, width>, radix> inout; @@ -1297,8 +1293,8 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi // Final template <typename T, size_t width, size_t radix, bool inverse, size_t... I> -KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, - cbool_t<inverse>, complex<T>* out, const complex<T>* in, size_t stride) +KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>, + complex<T>* out, const complex<T>* in, size_t stride) { carray<cvec<T, width>, radix> inout; @@ -1313,8 +1309,7 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi template <size_t width, size_t radix, typename... Args> KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args) { - butterfly_helper(std::make_index_sequence<radix>(), i, csize<width>, csize<radix>, - std::forward<Args>(args)...); + butterfly_helper(csizeseq<radix>, i, csize<width>, csize<radix>, std::forward<Args>(args)...); } template <typename... Args> @@ -1333,7 +1328,7 @@ KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&. template <size_t width, typename... Args> KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args) { - __builtin_assume(count > 0); + CMT_ASSUME(count > 0); size_t i = 0; butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...); } @@ -1389,7 +1384,7 @@ template <size_t width, typename T, bool inverse, typename Tstride = csize_t<1>> KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle, Tstride ostride = Tstride{}) { - __builtin_assume(radix > 0); + CMT_ASSUME(radix > 0); { cvec<T, width> sum = T(); size_t j = 0; @@ -1408,7 +1403,7 @@ KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* } const size_t halfradix = radix / 2; const size_t halfradix_sqr = halfradix * halfradix; - __builtin_assume(halfradix > 0); + CMT_ASSUME(halfradix > 0); size_t i = 0; generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr, @@ -1428,7 +1423,7 @@ KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* ou cswitch(csizes<11>, radix, [&](auto radix_) CMT_INLINE_LAMBDA { - generic_butterfly_w<width>(val_of(radix_), cbool<inverse>, out, in, twiddle, ostride); + generic_butterfly_w<width>(decltype(radix_)(), cbool<inverse>, out, in, twiddle, ostride); }, [&]() CMT_INLINE_LAMBDA { generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride); diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -562,7 +562,7 @@ CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_p window_type::flattop, window_type::gaussian, window_type::lanczos>, type, [=](auto win) { - constexpr window_type window = val_of(win); + constexpr window_type window = val_of(decltype(win)()); return to_pointer<T>( typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry)); },