commit fe272701e398c512900e1d067cdf68320055fc81
parent e80e1015ae7cfd71be2bb858f5c99817f48aa7c3
Author: [email protected] <[email protected]>
Date: Thu, 21 Nov 2019 16:29:00 +0000
vec<vec<vec<T>>> support
Diffstat:
3 files changed, 183 insertions(+), 154 deletions(-)
diff --git a/include/kfr/simd/impl/operators.hpp b/include/kfr/simd/impl/operators.hpp
@@ -149,6 +149,62 @@ KFR_COMPLEX_OP_CVT(bor)
return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \
}
+#define KFR_VECVECVEC_OP1(fn) \
+ template <typename T1, size_t N1, size_t N2, size_t N3> \
+ KFR_INTRINSIC vec<vec<vec<T1, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x) \
+ { \
+ return fn(x.flatten()).v; \
+ }
+
+#define KFR_VECVECVEC_OP2(fn) \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \
+ const vec<vec<vec<T2, N1>, N2>, N3>& y) \
+ { /* VVV @ VVV */ \
+ return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \
+ const vec<vec<T2, N1>, N2>& y) \
+ { /* VVV @ VV */ \
+ return fn(innercast<C>(x.flatten()), repeat<N3>(innercast<C>(y.flatten()))).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<T1, N1>, N2>& x, \
+ const vec<vec<vec<T2, N1>, N2>, N3>& y) \
+ { /* VV @ VVV */ \
+ return fn(repeat<N3>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, const T2& y) \
+ { /* VVV @ S */ \
+ return fn(innercast<C>(x.flatten()), innercast<C>(y)).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \
+ const vec<T2, N1>& y) \
+ { /* VVV @ V */ \
+ return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const T1& x, const vec<vec<vec<T2, N1>, N2>, N3>& y) \
+ { /* S @ VVV */ \
+ return fn(innercast<C>(x), innercast<C>(y.flatten())).v; \
+ } \
+ template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \
+ KFR_ENABLE_IF(is_simd_type<C>::value)> \
+ KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<T1, N1>& x, \
+ const vec<vec<vec<T2, N1>, N2>, N3>& y) \
+ { /* V @ VVV */ \
+ return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \
+ }
+
KFR_VECVEC_OP1(neg)
KFR_VECVEC_OP1(bnot)
KFR_VECVEC_OP2(add)
@@ -159,6 +215,16 @@ KFR_VECVEC_OP2(band)
KFR_VECVEC_OP2(bor)
KFR_VECVEC_OP2(bxor)
+KFR_VECVECVEC_OP1(neg)
+KFR_VECVECVEC_OP1(bnot)
+KFR_VECVECVEC_OP2(add)
+KFR_VECVECVEC_OP2(sub)
+KFR_VECVECVEC_OP2(mul)
+KFR_VECVECVEC_OP2(div)
+KFR_VECVECVEC_OP2(band)
+KFR_VECVECVEC_OP2(bor)
+KFR_VECVECVEC_OP2(bxor)
+
} // namespace intrinsics
} // namespace CMT_ARCH_NAME
} // namespace kfr
diff --git a/include/kfr/simd/vec.hpp b/include/kfr/simd/vec.hpp
@@ -155,9 +155,17 @@ struct compoundcast<vec<T, N>>
template <typename T, size_t N1, size_t N2>
struct compoundcast<vec<vec<T, N1>, N2>>
{
- static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x; }
+ static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x.v; }
- static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x; }
+ static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; }
+};
+
+template <typename T, size_t N1, size_t N2, size_t N3>
+struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>>
+{
+ static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; }
+
+ static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; }
};
} // namespace internal
@@ -594,6 +602,14 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>&
return vec<vec<Tout, N1>, N2>(value);
}
+template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3,
+ KFR_ENABLE_IF(!is_same<Tin, Tout>::value)>
+constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
+ CMT_NOEXCEPT
+{
+ return vec<vec<vec<Tout, N1>, N2>, N3>(value);
+}
+
template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>::value)>
constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT
{
@@ -606,6 +622,14 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1>
return value;
}
+template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3,
+ KFR_ENABLE_IF(is_same<Tin, Tout>::value)>
+constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast(
+ const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
+{
+ return value;
+}
+
//
template <typename To, typename From,
@@ -627,6 +651,14 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>,
return vec<vec<Tout, N1>, N2>(value);
}
+template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3,
+ KFR_ENABLE_IF(!is_same<Tin, Tout>::value)>
+constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
+ CMT_NOEXCEPT
+{
+ return vec<vec<vec<Tout, N1>, N2>, N3>(value);
+}
+
template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>::value)>
constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
{
@@ -640,6 +672,14 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin
return value;
}
+template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3,
+ KFR_ENABLE_IF(is_same<Tin, Tout>::value)>
+constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast(
+ const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
+{
+ return value;
+}
+
//
template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>::value)>
@@ -660,6 +700,13 @@ constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& valu
return vec<Tout, N2>(value);
}
+template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3,
+ KFR_ENABLE_IF(!is_same<Tin, Tout>::value)>
+constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
+{
+ return vec<Tout, N3>(value);
+}
+
template <typename To, typename From>
CMT_GNU_CONSTEXPR KFR_INTRINSIC To bitcast(const From& value) CMT_NOEXCEPT
{
@@ -1195,6 +1242,22 @@ struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>>
}
};
+// vector to vector<vector<vector>>
+template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1>
+struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>>
+{
+ static_assert(N1 == Ns1, "");
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+
+ static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value)
+ {
+ return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(
+ kfr::innercast<To>(value.flatten())
+ .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
+ }
+};
+
// vector<vector> to vector<vector>
template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2>
struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
@@ -1209,6 +1272,22 @@ struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten()));
}
};
+
+// vector<vector<vector>> to vector<vector<vector>>
+template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3>
+struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>>
+{
+ static_assert(N1 == NN1, "");
+ static_assert(N2 == NN2, "");
+ static_assert(N3 == NN3, "");
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+
+ static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value)
+ {
+ return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten()));
+ }
+};
} // namespace internal
template <typename T, size_t N1, size_t N2 = N1>
@@ -1243,6 +1322,40 @@ struct vec_vec_template
using type = vec<vec<T, N1>, N2>;
};
+namespace internal
+{
+
+template <typename T, size_t... Ns>
+struct vecx_t;
+
+template <typename T>
+struct vecx_t<T>
+{
+ using type = T;
+};
+
+template <typename T, size_t N1>
+struct vecx_t<T, N1>
+{
+ using type = vec<T, N1>;
+};
+
+template <typename T, size_t N1, size_t N2>
+struct vecx_t<T, N1, N2>
+{
+ using type = vec<vec<T, N1>, N2>;
+};
+
+template <typename T, size_t N1, size_t N2, size_t N3>
+struct vecx_t<T, N1, N2, N3>
+{
+ using type = vec<vec<vec<T, N1>, N2>, N3>;
+};
+} // namespace internal
+
+template <typename T, size_t... Ns>
+using vecx = typename internal::vecx_t<T, Ns...>::type;
+
} // namespace CMT_ARCH_NAME
template <typename T1, typename T2, size_t N>
struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>>
@@ -1271,6 +1384,7 @@ struct common_type_impl<kfr::vec<kfr::vec<T1, N1>, N2>, kfr::vec<T2, N1>>
: common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type>
{
};
+
} // namespace kfr
namespace cometa
diff --git a/tests/asm_test.cpp b/tests/asm_test.cpp
@@ -177,8 +177,6 @@ using namespace kfr;
#define TEST_ASM_IF(fn, MACRO) TEST_ASM_I(fn, MACRO) TEST_ASM_F(fn, MACRO)
-#if 1
-
TEST_ASM_UIF(add, TEST_ASM_VTY2)
TEST_ASM_UIF(sub, TEST_ASM_VTY2)
@@ -284,8 +282,6 @@ TEST_FFT_GEN(f64)
#endif
-#endif
-
TEST_ASM_F(sin, TEST_ASM_VTY1_F)
TEST_ASM_F(cos, TEST_ASM_VTY1_F)
@@ -296,154 +292,7 @@ namespace kfr
#ifdef KFR_SHOW_NOT_OPTIMIZED
KFR_PUBLIC void not_optimized(const char* fn) CMT_NOEXCEPT { puts(fn); }
#endif
-} // namespace kfr
-KFR_PUBLIC void test_shuffle_old1(f32x1& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2>, overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old2(f32x4& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<3, 2, 1, 0>,
- overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old3(f32x4& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<0, 1, 2, 3>,
- overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old4(f32x2& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2, 3>, overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old5(f32x8& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v,
- csizes<3, 2, 1, 0, 0, 1, 2, 3>, overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old6(f32x8& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v,
- csizes<7, 6, 5, 4, 3, 2, 1, 0>, overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_old9(vec<f32, 3>& x, const vec<f32, 15>& y)
-{
- x.v = kfr::intrinsics::simd_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 2, 1>,
- overload_auto);
-}
-
-KFR_PUBLIC void test_shuffle_new1(f32x1& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2>);
-}
-
-KFR_PUBLIC void test_shuffle_new2(f32x4& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<3, 2, 1, 0>);
-}
-
-KFR_PUBLIC void test_shuffle_new3(f32x4& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<0, 1, 2, 3>);
-}
-
-KFR_PUBLIC void test_shuffle_new4(f32x2& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v, csizes<2, 3>);
-}
-
-KFR_PUBLIC void test_shuffle_new5(f32x8& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v,
- csizes<3, 2, 1, 0, 0, 1, 2, 3>);
-}
-
-KFR_PUBLIC void test_shuffle_new6(f32x8& x, const f32x4& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 4>{}, y.v,
- csizes<7, 6, 5, 4, 3, 2, 1, 0>);
-}
-
-KFR_PUBLIC void test_shuffle_new7(f32x1& x, const f32x32& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 32>{}, (y + 1.f).v, csizes<19>);
-}
-
-KFR_PUBLIC void test_shuffle_new8(f32x8& x, const f32x8& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v,
- csizes<3, 2, 1, 0, 3, 2, 1, 0>);
-}
-
-KFR_PUBLIC void test_shuffle_new9(vec<f32, 3>& x, const vec<f32, 15>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 2, 1>);
-}
-
-KFR_PUBLIC void test_shuffle_new9a(vec<f32, 3>& x, const vec<f32, 15>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<5, 6, 7>);
-}
-
-KFR_PUBLIC void test_shuffle_new9b(vec<f32, 3>& x, const vec<f32, 15>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<11, 11, 11>);
-}
-
-KFR_PUBLIC void test_shuffle_new9c(vec<f32, 3>& x, const vec<f32, 15>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 15>{}, y.v, csizes<3, 4, 5>);
-}
-
-KFR_PUBLIC void test_shuffle_new10(vec<f32, 15>& x)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 1>{}, 0.f,
- csizes<1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>);
-}
-KFR_PUBLIC void test_shuffle_new11(vec<f32, 15>& x, float y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 1>{}, y,
- csizes<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>);
-}
-KFR_PUBLIC void test_shuffle_new12(vec<f32, 32>& x, const vec<f32, 32>& y)
-{
- x.v =
- kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 32>{}, y.v, csizeseq<32> ^ csize<1>);
-}
-KFR_PUBLIC void test_shuffle_new13(vec<f32, 8>& x, const vec<f32, 8>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v,
- csizes<0, 2, 4, 6, 1, 3, 5, 7>);
-}
-KFR_PUBLIC void test_shuffle_new14(vec<f32, 8>& x, const vec<f32, 8>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v,
- csizes<0, 4, 1, 5, 2, 6, 3, 7>);
-}
-KFR_PUBLIC void test_shuffle_new15(vec<f32, 4>& x, const vec<f32, 8>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 8>{}, y.v, csizes<0, 5, 2, 7>);
-}
-KFR_PUBLIC void test_shuffle_new16(vec<f32, 2>& x, const vec<f32, 2>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 2>{}, y.v, csizes<1, 0>);
-}
-KFR_PUBLIC void test_shuffle_new17(vec<f32, 16>& x, const vec<f32, 16>& y)
-{
- x.v = kfr::intrinsics::universal_shuffle(kfr::intrinsics::simd_t<f32, 16>{}, y.v,
- csizes<0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15>);
-}
-
-KFR_PUBLIC float tuple_assign()
-{
- auto [x, y, z, w] = f32x4(1.f, 2.f, 3.f, 4.f);
- return x + y * y + z * z * z + w * w * w * w;
-}
+} // namespace kfr
int main() { println(library_version()); }