kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit d2ae193ac50f802c05cddbea9c4d93debd58e196
parent a4451ff687898934fa206948281367c2463e9a7a
Author: [email protected] <[email protected]>
Date:   Tue, 26 Jul 2016 19:19:25 +0300

New version of saturated arithmetic functions

Diffstat:
Minclude/kfr/base/saturation.hpp | 37+++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp @@ -33,20 +33,26 @@ namespace internal template <typename T, size_t N> KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b) { - constexpr size_t shift = typebits<i32>::bits - 1; - const vec<T, N> sum = a + b; - a = (a >> shift) + allonesvector(a); - - return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum); + using UT = utype<T>; + constexpr size_t shift = typebits<UT>::bits - 1; + vec<UT, N> aa = bitcast<UT>(a); + vec<UT, N> bb = bitcast<UT>(b); + const vec<UT, N> sum = aa + bb; + aa = (aa >> shift) + static_cast<UT>(std::numeric_limits<T>::max()); + + return select(bitcast<T>((aa ^ bb) | ~(bb ^ sum)) >= 0, a, bitcast<T>(sum)); } template <typename T, size_t N> KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b) { - constexpr size_t shift = typebits<i32>::bits - 1; - const vec<T, N> diff = a - b; - a = (a >> shift) + allonesvector(a); - - return select(((a ^ b) & (a ^ diff)) < 0, a, diff); + using UT = utype<T>; + constexpr size_t shift = typebits<UT>::bits - 1; + vec<UT, N> aa = bitcast<UT>(a); + vec<UT, N> bb = bitcast<UT>(b); + const vec<UT, N> diff = aa - bb; + aa = (aa >> shift) + static_cast<UT>(std::numeric_limits<T>::max()); + + return select(bitcast<T>((aa ^ bb) & (aa ^ diff)) < 0, a, bitcast<T>(diff)); } template <typename T, size_t N> KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b) @@ -94,6 +100,9 @@ KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y) KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); } #endif +KFR_HANDLE_ALL_SIZES_2(satadd) +KFR_HANDLE_ALL_SIZES_2(satsub) + #else // fallback template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> @@ -117,10 +126,10 @@ KFR_SINTRIN vec<T, N> satsub(vec<T, N> a, vec<T, N> b) return saturated_unsigned_sub(a, b); } #endif -KFR_HANDLE_SCALAR_1(satadd) -KFR_FN(satadd) -KFR_HANDLE_SCALAR_1(satsub) -KFR_FN(satsub) +KFR_HANDLE_SCALAR_2(satadd) +KFR_I_FN(satadd) +KFR_HANDLE_SCALAR_2(satsub) +KFR_I_FN(satsub) } template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>