commit 23256d01232179aada01c60b28a512e6702192a6
parent fc1b84b976c9c5d4bdcc625ed146d40f4eb879c9
Author: [email protected] <[email protected]>
Date: Mon, 8 Aug 2016 06:39:08 +0300
Xcode 6.3 support
Diffstat:
11 files changed, 65 insertions(+), 54 deletions(-)
diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp
@@ -348,7 +348,8 @@ struct multioutput : output_expression
template <typename T, size_t N>
void operator()(coutput_t, size_t index, const vec<T, N>& x)
{
- cfor(csize<0>, csize<sizeof...(E)>, [&](auto n) { std::get<val_of(n)>(outputs)(coutput, index, x); });
+ cfor(csize<0>, csize<sizeof...(E)>,
+ [&](auto n) { std::get<val_of(decltype(n)())>(outputs)(coutput, index, x); });
}
std::tuple<E...> outputs;
diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp
@@ -85,7 +85,7 @@ inline void aligned_free(void* ptr)
template <typename T = void, size_t alignment = native_cache_alignment>
CMT_INLINE T* aligned_allocate(size_t size = 1)
{
- T* ptr = static_cast<T*>(__builtin_assume_aligned(
+ T* ptr = static_cast<T*>(CMT_ASSUME_ALIGNED(
internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment));
return ptr;
}
diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp
@@ -134,8 +134,8 @@ expression_vtable<T, maxwidth> make_expression_vtable_impl()
result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>);
cforeach(csizeseq<size>, [&](auto u) {
- constexpr size_t N = 1 << val_of(u);
- result.get(csize<2 + val_of(u)>) =
+ constexpr size_t N = 1 << val_of(decltype(u)());
+ result.get(csize<2 + val_of(decltype(u)())>) =
reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>());
});
return result;
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -739,9 +739,9 @@ template <typename... Ts>
using is_numeric_args = and_t<is_numeric<Ts>...>;
template <typename T, cpu_t c = cpu_t::native>
-constexpr size_t vector_width = cmax(size_t(1), typeclass<T> == datatype::f
- ? native_float_vector_size<c> / sizeof(T)
- : native_int_vector_size<c> / sizeof(T));
+constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f
+ ? native_float_vector_size<c> / sizeof(T)
+ : native_int_vector_size<c> / sizeof(T));
template <cpu_t c>
constexpr size_t vector_width<void, c> = 0;
@@ -750,7 +750,7 @@ namespace internal
{
template <cpu_t c>
-constexpr size_t native_vector_alignment = cmax(native_float_vector_size<c>, native_int_vector_size<c>);
+constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>);
template <cpu_t c>
constexpr bool fast_unaligned =
@@ -781,7 +781,7 @@ template <typename T, cpu_t c>
constexpr size_t vector_capacity = native_register_count* vector_width<T, c>;
template <typename T, cpu_t c>
-constexpr size_t maximum_vector_size = cmin(static_cast<size_t>(32), vector_capacity<T, c> / 4);
+constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4);
}
}
namespace cometa
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -375,8 +375,8 @@ constexpr CMT_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept
constexpr CMT_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
-template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)>
-CMT_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest);
+template <typename T, size_t N, size_t... Sizes>
+CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest);
namespace internal
{
@@ -944,8 +944,8 @@ CMT_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Si
}
}
-template <typename T, size_t N, size_t... Sizes, size_t Nout>
-CMT_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest)
+template <typename T, size_t N, size_t... Sizes>
+CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest)
{
return internal::concat(x, rest...);
}
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -1,7 +1,7 @@
#pragma once
#ifdef LIBC_WORKAROUND_GETS
-extern char *gets (char *__s);
+extern char* gets(char* __s);
#endif
#if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__)
@@ -278,6 +278,21 @@ extern char *gets (char *__s);
#define CMT_HAS_BUILTIN(builtin) 0
#endif
+#if CMT_HAS_BUILTIN(CMT_ASSUME)
+#define CMT_ASSUME(x) __builtin_assume(x)
+#else
+#define CMT_ASSUME(x) \
+ do \
+ { \
+ } while (0)
+#endif
+
+#if CMT_HAS_BUILTIN(CMT_ASSUME)
+#define CMT_ASSUME_ALIGNED(x, a) __builtin_assume_aligned(x, a)
+#else
+#define CMT_ASSUME_ALIGNED(x, a) x
+#endif
+
#ifdef __has_feature
#define CMT_HAS_FEATURE(feature) __has_feature(feature)
#else
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -24,12 +24,12 @@ using void_t = void;
// Workaround for GCC 4.8
template <typename T>
-constexpr const T& cmax(const T& x, const T& y)
+constexpr const T& const_max(const T& x, const T& y)
{
return x > y ? x : y;
}
template <typename T>
-constexpr const T& cmin(const T& x, const T& y)
+constexpr const T& const_min(const T& x, const T& y)
{
return x < y ? x : y;
}
diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp
@@ -85,7 +85,7 @@ KFR_INTRIN void fft_reorder_swap(T* inout, size_t i)
template <size_t log2n, size_t bitrev, typename T>
KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j)
{
- __builtin_assume(i != j);
+ CMT_ASSUME(i != j);
using cxx = cvec<T, 16>;
constexpr size_t N = 1 << log2n;
constexpr size_t N4 = 2 * N / 4;
@@ -102,7 +102,7 @@ KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j)
template <size_t log2n, size_t bitrev, typename T>
KFR_INTRIN void fft_reorder_swap(T* inout, size_t i, size_t j)
{
- __builtin_assume(i != j);
+ CMT_ASSUME(i != j);
using cxx = cvec<T, 16>;
constexpr size_t N = 1 << log2n;
constexpr size_t N4 = 2 * N / 4;
@@ -259,7 +259,7 @@ void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>)
template <typename T, bool use_br2>
KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbool_t<use_br2>)
{
- __builtin_assume(i != j);
+ CMT_ASSUME(i != j);
const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4);
const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4);
cwrite_reordered(inout + j, vi, N4, cbool<use_br2>);
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -288,12 +288,12 @@ KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t
constexpr static size_t prefetch_offset = width * 8;
const auto N4 = N / csize<4>;
const auto N43 = N4 * csize<3>;
- __builtin_assume(blocks > 0);
- __builtin_assume(N > 0);
- __builtin_assume(N4 > 0);
+ CMT_ASSUME(blocks > 0);
+ CMT_ASSUME(N > 0);
+ CMT_ASSUME(N4 > 0);
CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++)
{
-#pragma clang loop unroll_count(default_unroll_count)
+#pragma clang loop unroll_count(2)
for (size_t n2 = 0; n2 < N4; n2 += width)
{
if (prefetch)
@@ -315,7 +315,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfal
cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
{
- __builtin_assume(blocks > 0);
+ CMT_ASSUME(blocks > 0);
constexpr static size_t prefetch_offset = 32 * 4;
for (size_t b = 0; b < blocks; b++)
{
@@ -352,7 +352,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<8>, size_t blocks, csize_t<width>, cfals
cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
{
- __builtin_assume(blocks > 0);
+ CMT_ASSUME(blocks > 0);
constexpr static size_t prefetch_offset = width * 16;
for (size_t b = 0; b < blocks; b += 2)
{
@@ -377,7 +377,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfal
cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
{
- __builtin_assume(blocks > 0);
+ CMT_ASSUME(blocks > 0);
constexpr static size_t prefetch_offset = width * 4;
#pragma clang loop unroll_count(2)
for (size_t b = 0; b < blocks; b += 2)
@@ -409,7 +409,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<4>, size_t blocks, csize_t<width>, cfals
complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
{
constexpr static size_t prefetch_offset = width * 4;
- __builtin_assume(blocks > 0);
+ CMT_ASSUME(blocks > 0);
CMT_LOOP_NOUNROLL
for (size_t b = 0; b < blocks; b += 4)
{
@@ -453,8 +453,8 @@ protected:
if (splitin)
in = out;
const size_t stage_size = this->stage_size;
- __builtin_assume(stage_size >= 2048);
- __builtin_assume(stage_size % 2048 == 0);
+ CMT_ASSUME(stage_size >= 2048);
+ CMT_ASSUME(stage_size % 2048 == 0);
radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>,
cbool<inverse>, cbool<aligned>, out, in, twiddle);
}
@@ -836,14 +836,14 @@ struct dft_plan
const size_t log2n = ilog2(size);
cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n,
[&](auto log2n) {
- add_stage<internal::fft_specialization_t<T, val_of(log2n), false>::template type>(
- size, type);
+ add_stage<internal::fft_specialization_t<T, val_of(decltype(log2n)()),
+ false>::template type>(size, type);
},
[&]() {
cswitch(cfalse_true, is_even(log2n), [&](auto is_even) {
make_fft(size, type, is_even, ctrue);
- add_stage<internal::fft_reorder_stage_impl_t<T, val_of(is_even)>::template type>(
- size, type);
+ add_stage<internal::fft_reorder_stage_impl_t<
+ T, val_of(decltype(is_even)())>::template type>(size, type);
});
});
initialize(type);
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -60,7 +60,7 @@ CMT_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y)
/// Complex Multiplication
template <typename T, size_t N1, size_t N2>
-CMT_INLINE vec<T, std::max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y)
+CMT_INLINE vec<T, const_max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y)
{
return internal::cmul_impl(x, y);
}
@@ -359,8 +359,6 @@ CMT_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * gro
return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>);
}
-constexpr size_t default_unroll_count = 2;
-
template <typename T>
KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0,
cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5,
@@ -441,15 +439,15 @@ constexpr KFR_INTRIN T chsign(T x)
template <typename T, size_t N, size_t size, size_t start, size_t step, bool inverse = false,
size_t... indices>
-constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>)
+constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(csizes_t<indices...>)
{
return make_vector((indices & 1 ? chsign<inverse>(-sin_using_table<T>(size, (indices / 2 * step + start)))
: cos_using_table<T>(size, (indices / 2 * step + start)))...);
}
template <typename T, size_t width, size_t... indices>
-constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>,
- size_t size, size_t start, size_t step)
+constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(csizes_t<indices...>, size_t size, size_t start,
+ size_t step)
{
return make_vector((indices & 1 ? -sin_using_table<T>(size, indices / 2 * step + start)
: cos_using_table<T>(size, indices / 2 * step + start))...);
@@ -458,14 +456,13 @@ constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequen
template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false>
constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle()
{
- return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(
- std::make_index_sequence<width * 2>());
+ return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(csizeseq<width * 2>);
}
template <typename T, size_t width>
constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0)
{
- return get_fixed_twiddle_helper<T, width>(std::make_index_sequence<width * 2>(), start, step, size);
+ return get_fixed_twiddle_helper<T, width>(csizeseq<width * 2>, start, step, size);
}
template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false>
@@ -1280,9 +1277,8 @@ KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddl
// Non-final
template <typename T, size_t width, size_t radix, bool inverse, size_t... I>
-KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>,
- cbool_t<inverse>, complex<T>* out, const complex<T>* in,
- const complex<T>* tw, size_t stride)
+KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>,
+ complex<T>* out, const complex<T>* in, const complex<T>* tw, size_t stride)
{
carray<cvec<T, width>, radix> inout;
@@ -1297,8 +1293,8 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi
// Final
template <typename T, size_t width, size_t radix, bool inverse, size_t... I>
-KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>,
- cbool_t<inverse>, complex<T>* out, const complex<T>* in, size_t stride)
+KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>,
+ complex<T>* out, const complex<T>* in, size_t stride)
{
carray<cvec<T, width>, radix> inout;
@@ -1313,8 +1309,7 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi
template <size_t width, size_t radix, typename... Args>
KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args)
{
- butterfly_helper(std::make_index_sequence<radix>(), i, csize<width>, csize<radix>,
- std::forward<Args>(args)...);
+ butterfly_helper(csizeseq<radix>, i, csize<width>, csize<radix>, std::forward<Args>(args)...);
}
template <typename... Args>
@@ -1333,7 +1328,7 @@ KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&.
template <size_t width, typename... Args>
KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args)
{
- __builtin_assume(count > 0);
+ CMT_ASSUME(count > 0);
size_t i = 0;
butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...);
}
@@ -1389,7 +1384,7 @@ template <size_t width, typename T, bool inverse, typename Tstride = csize_t<1>>
KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
const complex<T>* twiddle, Tstride ostride = Tstride{})
{
- __builtin_assume(radix > 0);
+ CMT_ASSUME(radix > 0);
{
cvec<T, width> sum = T();
size_t j = 0;
@@ -1408,7 +1403,7 @@ KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>*
}
const size_t halfradix = radix / 2;
const size_t halfradix_sqr = halfradix * halfradix;
- __builtin_assume(halfradix > 0);
+ CMT_ASSUME(halfradix > 0);
size_t i = 0;
generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr,
@@ -1428,7 +1423,7 @@ KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* ou
cswitch(csizes<11>, radix,
[&](auto radix_) CMT_INLINE_LAMBDA {
- generic_butterfly_w<width>(val_of(radix_), cbool<inverse>, out, in, twiddle, ostride);
+ generic_butterfly_w<width>(decltype(radix_)(), cbool<inverse>, out, in, twiddle, ostride);
},
[&]() CMT_INLINE_LAMBDA {
generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride);
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -562,7 +562,7 @@ CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_p
window_type::flattop, window_type::gaussian, window_type::lanczos>,
type,
[=](auto win) {
- constexpr window_type window = val_of(win);
+ constexpr window_type window = val_of(decltype(win)());
return to_pointer<T>(
typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry));
},