kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit b94a2b4eb46bd3ac63daab644c7e46da2c7f3db8
parent 8a8ba7ae333ee7de7c3b703908e2c0fa8b92936a
Author: [email protected] <[email protected]>
Date:   Tue,  2 Aug 2022 17:53:43 +0100

kfr::tensor

Diffstat:
Minclude/kfr/base/expression.hpp | 488++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Ainclude/kfr/base/impl/static_array.hpp | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/shape.hpp | 515+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/tensor.hpp | 823+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 1887 insertions(+), 26 deletions(-)

diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -29,6 +29,7 @@ #include "../simd/shuffle.hpp" #include "../simd/types.hpp" #include "../simd/vec.hpp" +#include "shape.hpp" #include <tuple> #ifdef KFR_STD_COMPLEX @@ -55,43 +56,242 @@ struct complex; #endif #endif -constexpr size_t inout_context_size = 16; +template <typename T, typename V = void> +struct expression_traits; -struct coutput_context +template <typename T> +using expression_value_type = typename expression_traits<T>::value_type; + +template <typename T> +constexpr inline size_t expression_dims = expression_traits<T>::dims; + +template <typename T> +constexpr inline shape<expression_dims<T>> shapeof(T&& expr) +{ + return expression_traits<T>::shapeof(expr); +} +template <typename T> +constexpr inline shape<expression_dims<T>> shapeof() +{ + return expression_traits<T>::shapeof(); +} + +template <typename T> +struct expression_traits<T&, std::void_t<expression_value_type<T>>> : expression_traits<T> +{ +}; +template <typename T> +struct expression_traits<T&&, std::void_t<expression_value_type<T>>> : expression_traits<T> +{ +}; +template <typename T> +struct expression_traits<const T&, std::void_t<expression_value_type<T>>> : expression_traits<T> +{ +}; +template <typename T> +struct expression_traits<const T&&, std::void_t<typename expression_traits<T>::value_type>> + : expression_traits<T> { - pconstvoid data[inout_context_size]; }; -struct cinput_context +struct expression_traits_defaults { - pconstvoid data[inout_context_size]; + // using value_type = void; + // constexpr static size_t dims = 0; + // constexpr static shape<dims> shapeof(const T&); + // constexpr static shape<dims> shapeof(); + + constexpr static inline bool explicit_operand = true; }; -using coutput_t = const coutput_context*; -using cinput_t = const cinput_context*; +namespace internal_generic +{ +template <typename... Xs> +using expressions_check = std::enable_if_t<(expression_traits<Xs>::explicit_operand || ...)>; +} -constexpr cinput_t cinput = nullptr; -constexpr coutput_t coutput = nullptr; +#define KFR_ACCEPT_EXPRESSIONS(...) internal_generic::expressions_check<__VA_ARGS__>* = nullptr + +template <typename T> +struct expression_traits<T, std::enable_if_t<is_simd_type<T>>> : expression_traits_defaults +{ + using value_type = T; + constexpr static size_t dims = 0; + constexpr static inline bool explicit_operand = false; -constexpr size_t infinite_size = static_cast<size_t>(-1); + KFR_MEM_INTRINSIC constexpr static shape<0> shapeof(const T& self) { return {}; } + KFR_MEM_INTRINSIC constexpr static shape<0> shapeof() { return {}; } +}; -CMT_INTRINSIC constexpr size_t size_add(size_t x, size_t y) +inline namespace CMT_ARCH_NAME +{ +template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_simd_type<std::decay_t<T>>)> +KFR_MEM_INTRINSIC vec<U, N> get_elements(T&& self, const shape<0>& index, vec_shape<U, N> sh) +{ + return self; +} +template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_simd_type<std::decay_t<T>>)> +KFR_MEM_INTRINSIC void set_elements(T& self, const shape<0>& index, const vec<U, N>& val) { - return (x == infinite_size || y == infinite_size) ? infinite_size : x + y; + static_assert(N == 1); + static_assert(!std::is_const_v<T>); + self = val.front(); } +} // namespace CMT_ARCH_NAME + +inline namespace CMT_ARCH_NAME +{ -CMT_INTRINSIC constexpr size_t size_sub(size_t x, size_t y) +template <typename Out, typename In, size_t w, size_t gw, typename Tin, index_t outdims, index_t indims> +KFR_INTRINSIC static void tprocess_body(Out&& out, In&& in, size_t start, size_t stop, size_t insize, + shape<outdims> outidx, shape<indims> inidx) { - return (x == infinite_size || y == infinite_size) ? infinite_size : (x > y ? x - y : 0); + size_t x = start; + if constexpr (w > gw) + { + CMT_LOOP_NOUNROLL + for (; x < stop / w * w; x += w) + { + outidx.set_revindex(0, x); + inidx.set_revindex(0, std::min(x, insize - 1)); + set_elements(out, outidx, get_elements(in, inidx, vec_shape<Tin, w>())); + } + } + CMT_LOOP_NOUNROLL + for (; x < stop / gw * gw; x += gw) + { + outidx.set_revindex(0, x); + inidx.set_revindex(0, std::min(x, insize - 1)); + set_elements(out, outidx, get_elements(in, inidx, vec_shape<Tin, gw>())); + } } -CMT_INTRINSIC constexpr size_t size_min(size_t x) CMT_NOEXCEPT { return x; } +template <size_t width = 0, typename Out, typename In, size_t gw = 1, + CMT_ENABLE_IF(expression_traits<Out>::dims == 0)> +static auto tprocess(Out&& out, In&& in, shape<0> = {}, shape<0> = {}, csize_t<gw> = {}) -> shape<0> +{ + set_elements(out, shape<0>{}, + get_elements(in, shape<0>{}, vec_shape<typename expression_traits<In>::value_type, 1>())); + return {}; +} -template <typename... Ts> -CMT_INTRINSIC constexpr size_t size_min(size_t x, size_t y, Ts... rest) CMT_NOEXCEPT +namespace internal +{ + +constexpr size_t select_process_width(size_t width, size_t vec_width, index_t last_dim_size) +{ + if (width != 0) + return width; + if (last_dim_size == 0) + return vec_width; + + return std::min(vec_width, last_dim_size); +} +} // namespace internal + +template <size_t width = 0, typename Out, typename In, size_t gw = 1, + typename Tin = expression_value_type<In>, typename Tout = expression_value_type<Out>, + index_t outdims = expression_dims<Out>, CMT_ENABLE_IF(expression_dims<Out> > 0)> +static auto tprocess(Out&& out, In&& in, shape<outdims> start = 0, shape<outdims> size = infinite_size, + csize_t<gw> = {}) -> shape<outdims> { - return size_min(x < y ? x : y, rest...); + constexpr index_t indims = expression_dims<In>; + static_assert(outdims >= indims); + + constexpr index_t last_dim_size = prev_poweroftwo(expression_traits<Out>::shapeof().back()); + +#ifdef NDEBUG + constexpr size_t vec_width = maximum_vector_size<Tin>; +#else + constexpr size_t vec_width = vector_width<Tin>; +#endif + + constexpr size_t w = internal::select_process_width(width, vec_width, last_dim_size); + + const shape<outdims> outshape = shapeof(out); + const shape<indims> inshape = shapeof(in); + if (CMT_UNLIKELY(!internal_generic::can_assign_from(outshape, inshape))) + return { 0 }; + shape<outdims> stop = min(start + size, outshape); + + // min(out, in, size + start) - start + + shape<outdims> outidx; + if constexpr (outdims == 1) + { + outidx = shape<outdims>{ 0 }; + tprocess_body<Out, In, w, gw, Tin, outdims, indims>( + std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0), + inshape.revindex(0), outidx, inshape.adapt(outidx)); + } + else if constexpr (outdims == 2) + { + for (index_t x = start.revindex(1); x < stop.revindex(1); ++x) + { + outidx = shape<outdims>{ x, 0 }; + tprocess_body<Out, In, w, gw, Tin, outdims, indims>( + std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0), + inshape.revindex(0), outidx, inshape.adapt(outidx)); + } + } + else if constexpr (outdims == 3) + { + for (index_t x = start.revindex(2); x < stop.revindex(2); ++x) + { + for (index_t y = start.revindex(1); y < stop.revindex(1); ++y) + { + outidx = shape<outdims>{ x, y, 0 }; + tprocess_body<Out, In, w, gw, Tin, outdims, indims>( + std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0), + inshape.revindex(0), outidx, inshape.adapt(outidx)); + } + } + } + else if constexpr (outdims == 4) + { + for (index_t x = start.revindex(3); x < stop.revindex(3); ++x) + { + for (index_t y = start.revindex(2); y < stop.revindex(2); ++y) + { + for (index_t z = start.revindex(1); z < stop.revindex(1); ++z) + { + outidx = shape<outdims>{ x, y, z, 0 }; + tprocess_body<Out, In, w, gw, Tin, outdims, indims>( + std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0), + inshape.revindex(0), outidx, inshape.adapt(outidx)); + } + } + } + } + else + { + shape<outdims> outidx = start; + if (CMT_UNLIKELY(!internal_generic::compare_indices(outidx, stop, outdims - 2))) + return stop; + do + { + tprocess_body<Out, In, w, gw, Tin, outdims, indims>( + std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0), + inshape.revindex(0), outidx, inshape.adapt(outidx)); + } while (internal_generic::increment_indices(outidx, start, stop, outdims - 2)); + } + return stop; } +} // namespace CMT_ARCH_NAME + +struct coutput_context +{ +}; + +struct cinput_context +{ +}; + +using coutput_t = const coutput_context*; +using cinput_t = const cinput_context*; + +constexpr cinput_t cinput = nullptr; +constexpr coutput_t coutput = nullptr; /// @brief Base class of all input expressoins struct input_expression @@ -229,10 +429,22 @@ internal::expression_lambda<T, decay<Fn>> lambda(Fn&& fn) return internal::expression_lambda<T, decay<Fn>>(std::move(fn)); } -namespace internal +} // namespace CMT_ARCH_NAME + +namespace internal_generic { template <typename T, typename = void> +struct has_static_size_impl : std::false_type +{ +}; + +template <typename T> +struct has_static_size_impl<T, std::void_t<decltype(T::size())>> : std::true_type +{ +}; + +template <typename T, typename = void> struct is_infinite_impl : std::false_type { }; @@ -242,10 +454,231 @@ struct is_infinite_impl<T, void_t<decltype(T::size())>> : std::integral_constant<bool, T::size() == infinite_size> { }; -} // namespace internal +} // namespace internal_generic + +template <typename T> +constexpr inline bool is_infinite = internal_generic::is_infinite_impl<T>::value; + +template <typename T> +constexpr inline bool has_static_size = internal_generic::has_static_size_impl<T>::value; template <typename T> -constexpr inline bool is_infinite = internal::is_infinite_impl<T>::value; +struct expression_traits<T, std::enable_if_t<std::is_base_of_v<input_expression, T>>> + : expression_traits_defaults +{ + using value_type = value_type_of<T>; + constexpr static size_t dims = 1; + + constexpr static shape<1> shapeof(const T& self) { return self.size(); } + constexpr static shape<1> shapeof() + { + if constexpr (has_static_size<T>) + { + return { T::size() }; + } + else + { + return { 0 }; + } + } +}; + +inline namespace CMT_ARCH_NAME +{ +template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_input_expression<T>)> +KFR_MEM_INTRINSIC vec<U, N> get_elements(T&& self, const shape<1>& index, vec_shape<U, N> sh) +{ + return get_elements(self, cinput_t{}, index[0], sh); +} +} // namespace CMT_ARCH_NAME + +template <typename... Args> +struct xwitharguments +{ + constexpr static size_t count = sizeof...(Args); + + using type_list = ctypes_t<Args...>; + + template <size_t idx> + using nth = typename type_list::template nth<idx>; + + std::tuple<Args...> args; + std::array<dimset, count> masks; + + KFR_INTRINSIC auto& first() { return std::get<0>(args); } + KFR_INTRINSIC const auto& first() const { return std::get<0>(args); } + + template <size_t idx> + KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const + { + static_assert(idx < count); + using Traits = expression_traits<nth<idx>>; + if constexpr (Traits::dims == 0) + { + return -1; + } + else + { + constexpr shape<Traits::dims> sh = Traits::shapeof(); + if constexpr (sh.cproduct() > 0) + { + return sh.tomask(); + } + else + { + return std::get<idx>(masks); + } + } + } + + template <typename Fn> + KFR_INTRINSIC constexpr auto fold(Fn&& fn) const + { + return fold_impl(std::forward<Fn>(fn), csizeseq<count>); + } + template <typename Fn> + KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn) + { + return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>); + } + + KFR_INTRINSIC xwitharguments(Args&&... args) : args{ std::forward<Args>(args)... } + { + cforeach(csizeseq<count>, + [&](auto idx_) CMT_INLINE_LAMBDA + { + constexpr size_t idx = val_of(decltype(idx_)()); + shape sh = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args)); + masks[idx] = sh.tomask(); + }); + } + +private: + template <typename Fn, size_t... indices> + KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const + { + return fn(std::get<indices>(args)...); + } + template <typename Fn, size_t... indices> + KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>) + { + return fn(csize<indices>...); + } +}; + +template <typename... Args> +xwitharguments(Args&&... args) -> xwitharguments<Args...>; + +template <index_t Dims, typename Arg> +struct xreshape : public xwitharguments<Arg> +{ + shape<Dims> old_shape; + shape<Dims> new_shape; +}; + +template <typename Fn, typename... Args> +struct xfunction : public xwitharguments<Args...> +{ + Fn fn; + + KFR_INTRINSIC xfunction(xwitharguments<Args...> args, Fn&& fn) + : xwitharguments<Args...>{ std::move(args) }, fn(std::move(fn)) + { + } +}; + +template <typename... Args, typename Fn> +xfunction(const xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>; +template <typename... Args, typename Fn> +xfunction(xwitharguments<Args...>&& args, Fn&& fn) -> xfunction<Fn, Args...>; +template <typename... Args, typename Fn> +xfunction(xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>; + +template <typename Fn, typename... Args> +struct expression_traits<xfunction<Fn, Args...>> : expression_traits_defaults +{ + using E = xfunction<Fn, Args...>; + + using value_type = + typename std::invoke_result_t<Fn, + vec<typename expression_traits<Args>::value_type, 1>...>::value_type; + constexpr static size_t dims = const_max(expression_traits<Args>::dims...); + + constexpr static shape<dims> shapeof(const E& self) + { + return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA -> auto { + return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...); + }); + } + constexpr static shape<dims> shapeof() + { + return xfunction<Fn, Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA -> auto { + return internal_generic::common_shape( + expression_traits<typename E::template nth<val_of(decltype(args)())>>::shapeof()...); + }); + } +}; + +template <index_t Dims, typename Arg> +struct expression_traits<xreshape<Dims, Arg>> : expression_traits_defaults +{ + using value_type = typename expression_traits<Arg>::value_type; + constexpr static size_t dims = Dims; + + constexpr static shape<dims> shapeof(const xreshape<Dims, Arg>& self) { return self.new_shape; } +}; + +inline namespace CMT_ARCH_NAME +{ + +namespace internal +{ +template <index_t outdims, typename Fn, typename... Args, typename U, size_t N, index_t Dims, size_t idx> +KFR_MEM_INTRINSIC vec<U, N> get_arg(const xfunction<Fn, Args...>& self, const shape<Dims>& index, + vec_shape<U, N> sh, csize_t<idx>) +{ + using Traits = expression_traits<typename xfunction<Fn, Args...>::template nth<idx>>; + if constexpr (Traits::dims == 0) + { + return repeat<N>(get_elements(std::get<idx>(self.args), {}, vec_shape<U, 1>{})); + } + else + { + auto indices = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>)); + constexpr index_t last_dim = Traits::shapeof().back(); + if constexpr (last_dim > 0) + { + return repeat<N / std::min(last_dim, N)>( + get_elements(std::get<idx>(self.args), indices, vec_shape<U, std::min(last_dim, N)>{})); + } + else + { + if constexpr (N > 1) + { + if (CMT_UNLIKELY(self.masks[idx].back() == 0)) + return get_elements(std::get<idx>(self.args), indices, vec_shape<U, 1>{}).front(); + else + return get_elements(std::get<idx>(self.args), indices, sh); + } + else + { + return get_elements(std::get<idx>(self.args), indices, sh); + } + } + } +} +} // namespace internal + +template <typename Fn, typename... Args, typename U, size_t N, index_t Dims> +KFR_MEM_INTRINSIC vec<U, N> get_elements(const xfunction<Fn, Args...>& self, const shape<Dims>& index, + vec_shape<U, N> sh) +{ + constexpr index_t outdims = expression_traits<xfunction<Fn, Args...>>::dims; + return self.fold_idx( + [&](auto... idx) CMT_INLINE_LAMBDA -> vec<U, N> { + return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...); + }); +} namespace internal { @@ -260,7 +693,10 @@ struct expression_with_arguments : input_expression constexpr static size_t count = sizeof...(Args); expression_with_arguments() = delete; - constexpr expression_with_arguments(Args&&... args) CMT_NOEXCEPT : args(std::forward<Args>(args)...) {} + KFR_MEM_INTRINSIC constexpr expression_with_arguments(Args&&... args) CMT_NOEXCEPT + : args(std::forward<Args>(args)...) + { + } KFR_MEM_INTRINSIC void begin_block(cinput_t cinput, size_t size) const { @@ -427,9 +863,9 @@ CMT_INTRINSIC internal::expression_function<Fn, NewArgs...> rebind( template <size_t width = 0, typename OutputExpr, typename InputExpr, size_t groupsize = 1, typename Tvec = vec<value_type_of<InputExpr>, 1>> -static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0, - size_t size = infinite_size, coutput_t coutput = nullptr, - cinput_t cinput = nullptr, csize_t<groupsize> = csize_t<groupsize>()) +static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0, size_t size = infinite_size, + coutput_t coutput = nullptr, cinput_t cinput = nullptr, + csize_t<groupsize> = csize_t<groupsize>()) { using Tin = value_type_of<InputExpr>; static_assert(is_output_expression<OutputExpr>, "OutFn must be an expression"); @@ -444,7 +880,7 @@ static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0, #ifdef NDEBUG constexpr size_t w = width == 0 ? maximum_vector_size<Tin> : width; #else - constexpr size_t w = width == 0 ? vector_width<Tin> : width; + constexpr size_t w = width == 0 ? vector_width<Tin> : width; #endif static_assert(w > 0 && is_poweroftwo(w), ""); diff --git a/include/kfr/base/impl/static_array.hpp b/include/kfr/base/impl/static_array.hpp @@ -0,0 +1,87 @@ +#include "../../cometa.hpp" +#include "../../kfr.h" + +namespace kfr +{ +using namespace cometa; + +template <typename T, size_t> +using type_for = T; + +template <typename T, typename indices_t> +struct static_array_base; + +template <typename T, size_t... indices> +struct static_array_base<T, csizes_t<indices...>> +{ + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + using iterator = pointer; + using const_reference = const value_type&; + using const_pointer = const value_type*; + using const_iterator = const_pointer; + + constexpr static size_t static_size = sizeof...(indices); + + constexpr static_array_base() = default; + constexpr static_array_base(const static_array_base&) = default; + constexpr static_array_base(static_array_base&&) = default; + + KFR_MEM_INTRINSIC constexpr static_array_base(type_for<value_type, indices>... args) + { + (static_cast<void>(array[indices] = args), ...); + } + + constexpr static_array_base& operator=(const static_array_base&) = default; + constexpr static_array_base& operator=(static_array_base&&) = default; + + template <int dummy = 0, CMT_ENABLE_IF(dummy == 0 && static_size > 1)> + KFR_MEM_INTRINSIC constexpr static_array_base(value_type value) + { + (static_cast<void>(array[indices] = value), ...); + } + + KFR_MEM_INTRINSIC vec<T, static_size> operator*() const { return read<static_size>(data()); } + + KFR_MEM_INTRINSIC static_array_base(const vec<T, static_size>& v) { write(data(), v); } + + KFR_MEM_INTRINSIC constexpr const value_type* data() const { return std::data(array); } + KFR_MEM_INTRINSIC constexpr value_type* data() { return std::data(array); } + + KFR_MEM_INTRINSIC constexpr const_iterator begin() const { return std::begin(array); } + KFR_MEM_INTRINSIC constexpr iterator begin() { return std::begin(array); } + KFR_MEM_INTRINSIC constexpr const_iterator cbegin() const { return std::begin(array); } + + KFR_MEM_INTRINSIC constexpr const_iterator end() const { return std::end(array); } + KFR_MEM_INTRINSIC constexpr iterator end() { return std::end(array); } + KFR_MEM_INTRINSIC constexpr const_iterator cend() const { return std::end(array); } + + KFR_MEM_INTRINSIC constexpr const_reference operator[](size_t index) const { return array[index]; } + KFR_MEM_INTRINSIC constexpr reference operator[](size_t index) { return array[index]; } + + KFR_MEM_INTRINSIC constexpr const_reference front() const { return array[0]; } + KFR_MEM_INTRINSIC constexpr reference front() { return array[0]; } + + KFR_MEM_INTRINSIC constexpr const_reference back() const { return array[static_size - 1]; } + KFR_MEM_INTRINSIC constexpr reference back() { return array[static_size - 1]; } + + KFR_MEM_INTRINSIC constexpr bool empty() const { return false; } + + KFR_MEM_INTRINSIC constexpr size_t size() const { return std::size(array); } + + KFR_MEM_INTRINSIC constexpr bool operator==(const static_array_base& other) const + { + return ((array[indices] == other.array[indices]) && ...); + } + KFR_MEM_INTRINSIC constexpr bool operator!=(const static_array_base& other) const + { + return !operator==(other); + } + +private: + T array[static_size]; +}; +} // namespace kfr diff --git a/include/kfr/base/shape.hpp b/include/kfr/base/shape.hpp @@ -0,0 +1,515 @@ +#include "impl/static_array.hpp" + +#include "../simd/shuffle.hpp" +#include "../simd/types.hpp" +#include "../simd/vec.hpp" + +#include <bitset> + +namespace kfr +{ + +#ifndef KFR_32BIT_INDICES +using index_t = size_t; +#else +using index_t = uint32_t; +#endif +constexpr inline index_t max_index_t = std::numeric_limits<index_t>::max(); + +constexpr inline index_t infinite_size = max_index_t; + +constexpr inline index_t maximum_dims = 8; + +using dimset = vec<i8, maximum_dims>; + +CMT_INTRINSIC constexpr index_t size_add(index_t x, index_t y) +{ + return (x == infinite_size || y == infinite_size) ? infinite_size : x + y; +} + +CMT_INTRINSIC constexpr index_t size_sub(index_t x, index_t y) +{ + return (x == infinite_size || y == infinite_size) ? infinite_size : (x > y ? x - y : 0); +} + +CMT_INTRINSIC constexpr index_t size_min(index_t x) CMT_NOEXCEPT { return x; } + +template <typename... Ts> +CMT_INTRINSIC constexpr index_t size_min(index_t x, index_t y, Ts... rest) CMT_NOEXCEPT +{ + return size_min(x < y ? x : y, rest...); +} + +template <index_t dims> +struct shape; + +namespace internal_generic +{ +template <index_t dims> +KFR_INTRINSIC bool increment_indices(shape<dims>& indices, const shape<dims>& start, const shape<dims>& stop, + index_t dim = dims - 1); +} + +template <index_t dims> +struct shape : static_array_base<index_t, csizeseq_t<dims>> +{ + using static_array_base<index_t, csizeseq_t<dims>>::static_array_base; + + static_assert(dims < maximum_dims); + + shape operator+(const shape& other) const + { + vec<index_t, dims> x = **this; + vec<index_t, dims> y = *other; + mask<index_t, dims> inf = (x == infinite_size) || (y == infinite_size); + return select(inf, infinite_size, x + y); + } + shape operator-(const shape& other) const + { + vec<index_t, dims> x = **this; + vec<index_t, dims> y = *other; + mask<index_t, dims> inf = (x == infinite_size) || (y == infinite_size); + return select(inf, infinite_size, x - y); + } + + friend shape min(const shape& x, const shape& y) { return kfr::min(*x, *y); } + + KFR_MEM_INTRINSIC size_t to_flat(const shape<dims>& indices) const + { + size_t result = 0; + size_t scale = 1; + for (size_t i = 0; i < dims; ++i) + { + result += scale * indices[dims - 1 - i]; + scale *= (*this)[dims - 1 - i]; + } + return result; + } + KFR_MEM_INTRINSIC shape<dims> from_flat(size_t index) const + { + shape<dims> indices; + for (size_t i = 0; i < dims; ++i) + { + size_t sz = (*this)[dims - 1 - i]; + indices[dims - 1 - i] = index % sz; + index /= sz; + } + return indices; + } + + KFR_MEM_INTRINSIC index_t dot(const shape& other) const + { + if constexpr (dims == 1) + { + return (*this)[0] * other[0]; + } + else if constexpr (dims == 2) + { + return (*this)[0] * other[0] + (*this)[1] * other[1]; + } + else + { + return hdot(**this, *other); + } + } + + template <index_t indims> + KFR_MEM_INTRINSIC shape adapt(const shape<indims>& other) const + { + static_assert(indims >= dims); + return min(other.template trim<dims>(), **this - 1); + } + + KFR_MEM_INTRINSIC index_t product() const { return hproduct(**this); } + KFR_MEM_INTRINSIC constexpr index_t cproduct() const + { + index_t result = this->front(); + for (index_t i = 1; i < dims; i++) + result *= this->operator[](i); + return result; + } + + KFR_MEM_INTRINSIC dimset tomask() const + { + dimset result = 0; + for (index_t i = 0; i < dims; ++i) + { + result[i + maximum_dims - dims] = this->operator[](i) == 1 ? 0 : -1; + } + return result; + } + + template <size_t odims> + shape<odims> trim() const + { + static_assert(odims <= dims); + if constexpr (odims > 0) + { + return slice<dims - odims, odims>(**this); + } + else + { + return {}; + } + } + + KFR_MEM_INTRINSIC shape<dims - 1> trunc() const + { + if constexpr (dims > 1) + { + return slice<0, dims - 1>(**this); + } + else + { + return {}; + } + } + + shape<dims + 1> extend() const { return concat(**this, vec{ index_t(0) }); } + + KFR_MEM_INTRINSIC constexpr index_t revindex(size_t index) const + { + return index < dims ? this->operator[](dims - 1 - index) : 1; + } + KFR_MEM_INTRINSIC constexpr void set_revindex(size_t index, index_t val) + { + if (index < dims) + this->operator[](dims - 1 - index) = val; + } +}; + +template <> +struct shape<0> +{ + static constexpr size_t static_size = 0; + + static constexpr size_t size() { return static_size; } + + shape() = default; + shape(index_t value) {} + + KFR_MEM_INTRINSIC size_t to_flat(const shape<0>& indices) const { return 0; } + KFR_MEM_INTRINSIC shape<0> from_flat(size_t index) const { return {}; } + + template <index_t odims> + KFR_MEM_INTRINSIC shape<0> adapt(const shape<odims>& other) const + { + return {}; + } + + KFR_MEM_INTRINSIC size_t product() const { return 0; } + + KFR_MEM_INTRINSIC dimset tomask() const { return -1; } + + shape<1> extend() const { return { 0 }; } + + template <size_t new_dims> + shape<new_dims> trim() const + { + static_assert(new_dims == 0); + return {}; + } + + KFR_MEM_INTRINSIC bool operator==(const shape<0>& other) const { return true; } + KFR_MEM_INTRINSIC bool operator!=(const shape<0>& other) const { return false; } + + KFR_MEM_INTRINSIC index_t revindex(size_t index) const { return 1; } + KFR_MEM_INTRINSIC void set_revindex(size_t index, index_t val) {} +}; + +template <typename... Args> +shape(Args&&... args) -> shape<sizeof...(Args)>; + +namespace internal_generic +{ + +template <index_t outdims, index_t indims> +KFR_MEM_INTRINSIC shape<outdims> adapt(const shape<indims>& in, const dimset& set) +{ + static_assert(indims >= outdims); + if constexpr (outdims == 0) + { + return {}; + } + else + { + const vec<std::make_signed_t<index_t>, maximum_dims> eset = cast<std::make_signed_t<index_t>>(set); + return slice<indims - outdims, outdims>(*in) & slice<maximum_dims - outdims, outdims>(eset); + } +} +template <index_t outdims> +KFR_MEM_INTRINSIC shape<outdims> adapt(const shape<0>& in, const dimset& set) +{ + static_assert(outdims == 0); + return {}; +} +} // namespace internal_generic + +template <size_t Dims> +struct cursor +{ + shape<Dims> current; + shape<Dims> minimum; + shape<Dims> maximum; +}; + +struct tensor_range +{ + index_t start = 0; + index_t stop = max_index_t; + index_t step = 1; + + constexpr KFR_INTRINSIC index_t size() const { return stop - start; } +}; + +constexpr KFR_INTRINSIC tensor_range tstart(index_t start, index_t step = 1) +{ + return { start, max_index_t, step }; +} +constexpr KFR_INTRINSIC tensor_range tstop(index_t stop, index_t step = 1) { return { 0, stop, step }; } +constexpr KFR_INTRINSIC tensor_range trange(index_t start, index_t stop, index_t step = 1) +{ + return { start, stop, step }; +} +constexpr KFR_INTRINSIC tensor_range trange_n(index_t start, index_t size, index_t step = 1) +{ + return { start, start + size, step }; +} +constexpr KFR_INTRINSIC tensor_range tall() { return { 0, max_index_t, 1 }; } + +namespace internal_generic +{ + +constexpr inline index_t null_index = max_index_t; + +template <index_t dims> +constexpr KFR_INTRINSIC shape<dims> strides_for_shape(const shape<dims>& sh, index_t stride = 1) +{ + shape<dims> strides; + index_t n = stride; + for (index_t i = 0; i < dims; ++i) + { + strides[dims - 1 - i] = n; + n *= sh[dims - 1 - i]; + } + return strides; +} + +template <typename Index> +constexpr KFR_INTRINSIC index_t get_start(const Index& idx) +{ + if constexpr (std::is_same_v<std::decay_t<Index>, tensor_range>) + { + return idx.start; + } + else + { + static_assert(std::is_convertible_v<Index, index_t>); + return static_cast<index_t>(idx); + } +} +template <typename Index> +constexpr KFR_INTRINSIC index_t get_stop(const Index& idx) +{ + if constexpr (std::is_same_v<std::decay_t<Index>, tensor_range>) + { + return idx.stop; + } + else + { + static_assert(std::is_convertible_v<Index, index_t>); + return static_cast<index_t>(idx) + 1; + } +} +template <size_t dims, size_t outdims, bool... ranges> +constexpr KFR_INTRINSIC shape<outdims> compact_shape(const shape<dims>& in) +{ + shape<outdims> result; + constexpr std::array flags{ ranges... }; + size_t j = 0; + for (size_t i = 0; i < dims; ++i) + { + if (i >= flags.size() || flags[i]) + { + result[j++] = in[i]; + } + } + return result; +} + +template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)> +bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shape) +{ + for (size_t i = 0; i < outdims; ++i) + { + index_t dst_size = dst_shape.revindex(i); + index_t src_size = src_shape.revindex(i); + if (src_size == 1 || src_size == infinite_size || src_size == dst_size) + { + } + else + { + return false; + } + } + return true; +} + +template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)> +constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<dims2>& shape2) +{ + shape<outdims> result; + for (size_t i = 0; i < outdims; ++i) + { + index_t size1 = shape1.revindex(i); + index_t size2 = shape2.revindex(i); + if (size1 == 1 || size2 == 1 || size1 == size2) + { + result[outdims - 1 - i] = std::max(size1, size2); + } + else + { + // broadcast failed + result = 0; + return result; + } + } + return result; +} + +template <> +KFR_MEM_INTRINSIC shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2) +{ + return {}; +} + +template <index_t dims1, index_t dims2> +KFR_MEM_INTRINSIC bool same_layout(const shape<dims1>& x, const shape<dims2>& y) +{ + for (index_t i = 0, j = 0;;) + { + while (i < dims1 && x[i] == 1) + ++i; + while (j < dims2 && y[j] == 1) + ++j; + if (i == dims1 && j == dims2) + { + return true; + } + if (i < dims1 && j < dims2) + { + if (x[i] != y[j]) + return false; + } + else + { + return false; + } + ++i; + ++j; + } +} + +#ifdef KFR_VEC_INDICES +template <size_t step, index_t dims> +KFR_INTRINSIC vec<index_t, dims> increment_indices(vec<index_t, dims> indices, + const vec<index_t, dims>& start, + const vec<index_t, dims>& stop) +{ + indices = indices + make_vector(cconcat(cvalseq<index_t, dims - 1 - step, 0, 0>, cvalseq<index_t, 1, 1>, + cvalseq<index_t, step, 0, 0>)); + + if constexpr (step + 1 < dims) + { + vec<bit<index_t>, dims> mask = indices >= stop; + if (CMT_LIKELY(!any(mask))) + return indices; + indices = blend(indices, start, cconcat(csizeseq<dims - step - 1, 0, 0>, csizeseq<step + 1, 1, 0>)); + + return increment_indices<step + 1>(indices, stop); + } + else + { + return indices; + } +} +#endif + +template <index_t dims> +KFR_INTRINSIC bool compare_indices(const shape<dims>& indices, const shape<dims>& stop, + index_t dim = dims - 1) +{ + CMT_LOOP_UNROLL + for (int i = dim; i >= 0; --i) + { + if (CMT_UNLIKELY(indices[i] >= stop[i])) + return false; + } + return true; +} + +template <index_t dims> +KFR_INTRINSIC bool increment_indices(shape<dims>& indices, const shape<dims>& start, const shape<dims>& stop, + index_t dim) +{ +#ifdef KFR_VEC_INDICES + vec<index_t, dims> idx = increment_indices<0>(*indices, *start, *stop); + indices = idx; + if (any(idx == *stop)) + return false; + return true; +#else + + indices[dim] += 1; + CMT_LOOP_UNROLL + for (int i = dim; i >= 0;) + { + if (CMT_LIKELY(indices[i] < stop[i])) + return true; + // carry + indices[i] = start[i]; + --i; + if (i < 0) + { + return false; + } + indices[i] += 1; + } + return true; +#endif +} + +template <index_t dims> +KFR_INTRINSIC shape<dims> increment_indices_return(const shape<dims>& indices, const shape<dims>& start, + const shape<dims>& stop, index_t dim = dims - 1) +{ + shape<dims> result = indices; + if (increment_indices(result, start, stop, dim)) + { + return result; + } + else + { + return null_index; + } +} + +template <typename... Index> +constexpr KFR_INTRINSIC size_t count_dimensions() +{ + return ((std::is_same_v<std::decay_t<Index>, tensor_range> ? 1 : 0) + ...); +} +} // namespace internal_generic + +template <index_t dims> +constexpr KFR_INTRINSIC index_t size_of_shape(const shape<dims>& shape) +{ + index_t n = 1; + for (index_t i = 0; i < dims; ++i) + { + n *= shape[i]; + } + return n; +} + +} // namespace kfr diff --git a/include/kfr/base/tensor.hpp b/include/kfr/base/tensor.hpp @@ -0,0 +1,822 @@ +/** @addtogroup expressions + * @{ + */ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#include <optional> + +#include "../cometa/array.hpp" + +#include "../math/logical.hpp" +#include "../math/min_max.hpp" +#include "../simd/horizontal.hpp" +#include "../simd/impl/function.hpp" +#include "../simd/read_write.hpp" +#include "../simd/types.hpp" +#include "memory.hpp" + +CMT_PRAGMA_MSVC(warning(push)) +CMT_PRAGMA_MSVC(warning(disable : 4324)) + +namespace kfr +{ + +using memory_finalizer = std::shared_ptr<void>; + +template <typename Fn> +memory_finalizer KFR_INTRINSIC make_memory_finalizer(Fn&& fn) +{ + return std::shared_ptr<void>(nullptr, [fn = std::move(fn)](void*) { fn(); }); +} + +template <typename T, typename Derived, typename Dims> +struct tensor_subscript; + +template <typename T, typename Derived, index_t... Dims> +struct tensor_subscript<T, Derived, std::integer_sequence<size_t, Dims...>> +{ + constexpr static inline size_t dims = sizeof...(Dims); + + using reference = T&; + using const_reference = const T&; + + reference KFR_MEM_INTRINSIC operator()(type_for<index_t, Dims>... idx) const + { + return static_cast<const Derived&>(*this).access(shape<dims>{ idx... }); + } +}; + +template <typename T, index_t NDims> +struct tensor : public tensor_subscript<T, tensor<T, NDims>, std::make_index_sequence<NDims>> +{ +public: + using value_type = T; + using pointer = T* CMT_RESTRICT; + using const_pointer = const T* CMT_RESTRICT; + using reference = T&; + using const_reference = const T&; + using size_type = index_t; + + constexpr static inline index_t dims = NDims; + + using shape_type = shape<dims>; + + struct tensor_iterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + + const tensor* src; + shape_type indices; + + KFR_MEM_INTRINSIC index_t flat_index() const { return src->calc_index(indices); } + + KFR_MEM_INTRINSIC bool is_end() const { return indices.front() == internal_generic::null_index; } + + template <size_t num> + KFR_MEM_INTRINSIC shape<num> advance() + { + shape<num> result; + for (size_t i = 0; i < num; ++i) + { + result[i] = src->calc_index(indices); + ++*this; + if (is_end()) + break; + } + return result; + } + + KFR_MEM_INTRINSIC T& operator*() { return src->m_data[flat_index()]; } + KFR_MEM_INTRINSIC T* operator->() { return &operator*(); } + + // prefix + KFR_MEM_INTRINSIC tensor_iterator& operator++() + { + if (!internal_generic::increment_indices(indices, kfr::shape<dims>(0), src->m_shape)) + { + indices = internal_generic::null_index; + } + return *this; + } + + // postfix + KFR_MEM_INTRINSIC tensor_iterator operator++(int) + { + tensor_iterator temp = *this; + ++*this; + return temp; + } + + KFR_MEM_INTRINSIC bool operator==(const tensor_iterator& it) const + { + return src == it.src && indices == it.indices; + } + KFR_MEM_INTRINSIC bool operator!=(const tensor_iterator& it) const { return !operator==(it); } + }; + + using iterator = tensor_iterator; + using const_iterator = tensor_iterator; + + using contiguous_iterator = pointer; + using const_contiguous_iterator = pointer; + + KFR_MEM_INTRINSIC constexpr tensor() + : m_data(0), m_size(0), m_is_contiguous(false), m_shape{}, m_strides{} + { + } + + KFR_MEM_INTRINSIC tensor(T* data, const shape_type& shape, const shape_type& strides, + memory_finalizer finalizer) + : m_data(data), m_size(size_of_shape(shape)), + m_is_contiguous(strides == internal_generic::strides_for_shape(shape)), m_shape(shape), + m_strides(strides), m_finalizer(std::move(finalizer)) + { + } + + KFR_MEM_INTRINSIC tensor(T* data, const shape_type& shape, memory_finalizer finalizer) + : m_data(data), m_size(size_of_shape(shape)), m_is_contiguous(true), m_shape(shape), + m_strides(internal_generic::strides_for_shape(shape)), m_finalizer(std::move(finalizer)) + { + } + + KFR_INTRINSIC static T* allocate(size_t size) { return aligned_allocate<T>(size, 64); } + + KFR_INTRINSIC static void deallocate(T* ptr) { aligned_deallocate(ptr); } + + KFR_INTRINSIC explicit tensor(const shape_type& shape) + : m_size(size_of_shape(shape)), m_is_contiguous(true), m_shape(shape), + m_strides(internal_generic::strides_for_shape(shape)) + { + T* ptr = allocate(m_size); + m_data = ptr; + m_finalizer = make_memory_finalizer([ptr]() { deallocate(ptr); }); + } + KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides) + : m_size(size_of_shape(shape)), + m_is_contiguous(strides == internal_generic::strides_for_shape(shape)), m_shape(shape), + m_strides(strides) + { + T* ptr = allocate(m_size); + m_data = ptr; + m_finalizer = make_memory_finalizer([ptr]() { deallocate(ptr); }); + } + KFR_INTRINSIC tensor(const shape_type& shape, T value) : tensor(shape) + { + std::fill(contiguous_begin(), contiguous_end(), value); + } + + KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides, T value) : tensor(shape, strides) + { + std::fill(begin(), end(), value); + } + KFR_INTRINSIC tensor(const shape_type& shape, std::initializer_list<T> values) : tensor(shape) + { + if (values.size() != m_size) + throw std::runtime_error("Invalid initializer provided for kfr::tensor"); + std::copy(values.begin(), values.end(), contiguous_begin()); + } + + KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides, std::initializer_list<T> values) + : tensor(shape, strides) + { + if (values.size() != m_size) + throw std::runtime_error("Invalid initializer provided for kfr::tensor"); + std::copy(values.begin(), values.end(), begin()); + } + + KFR_INTRINSIC pointer data() const { return m_data; } + + KFR_INTRINSIC size_type size() const { return m_size; } + + KFR_INTRINSIC tensor_iterator begin() const { return tensor_iterator{ this, 0 }; } + KFR_INTRINSIC tensor_iterator end() const + { + return tensor_iterator{ this, internal_generic::null_index }; + } + + KFR_INTRINSIC void require_contiguous() const + { + if (!m_is_contiguous) + throw std::runtime_error("Contiguous array is required"); + } + + KFR_INTRINSIC contiguous_iterator contiguous_begin() const + { + require_contiguous(); + return m_data; + } + KFR_INTRINSIC contiguous_iterator contiguous_end() const + { + require_contiguous(); + return m_data + m_size; + } + + KFR_INTRINSIC contiguous_iterator contiguous_begin_unsafe() const { return m_data; } + KFR_INTRINSIC contiguous_iterator contiguous_end_unsafe() const { return m_data + m_size; } + + KFR_MEM_INTRINSIC index_t calc_index(const shape_type& indices) const { return indices.dot(m_strides); } + + KFR_MEM_INTRINSIC reference access(const shape_type& indices) const + { + return m_data[calc_index(indices)]; + } + + KFR_MEM_INTRINSIC reference operator[](index_t flat_index) const { return m_data[flat_index]; } + + KFR_MEM_INTRINSIC tensor operator()(const shape_type& start, const shape_type& stop) const + { + return tensor{ + m_data + calc_index(start), + stop - start, + m_strides, + m_finalizer, + }; + } + + tensor(const tensor&) = default; + tensor(tensor&&) = default; + +#if defined(CMT_COMPILER_MSVC) && !defined(CMT_COMPILER_CLANG) + tensor& operator=(const tensor& src) & + { + this->~tensor(); + new (this) tensor(src); + } + tensor& operator=(tensor&& src) & + { + this->~tensor(); + new (this) tensor(std::move(src)); + } +#else + tensor& operator=(const tensor& src) & = default; + tensor& operator=(tensor&& src) & = default; +#endif + + KFR_MEM_INTRINSIC const tensor& operator=(const tensor& src) const& + { + assign(src); + return *this; + } + KFR_MEM_INTRINSIC tensor& operator=(const tensor& src) && + { + assign(src); + return *this; + } + KFR_MEM_INTRINSIC const tensor& operator=(const T& scalar) const& + { + assign(scalar); + return *this; + } + KFR_MEM_INTRINSIC tensor& operator=(const T& scalar) && + { + assign(scalar); + return *this; + } + + KFR_MEM_INTRINSIC void assign(const tensor& src) const + { + if (src.shape() != m_shape) + throw std::range_error("Tensors must have smae shape"); + std::copy(src.begin(), src.end(), begin()); + } + KFR_MEM_INTRINSIC void assign(const T& scalar) const { std::fill(begin(), end(), scalar); } + + template <typename... Index> + static constexpr bool has_tensor_range = (std::is_same_v<Index, tensor_range> || ...); + + template <typename... Index, + size_t ndimOut = internal_generic::count_dimensions<Index...>() + (dims - sizeof...(Index)), + std::enable_if_t<has_tensor_range<Index...> || (sizeof...(Index) < dims)>* = nullptr> + KFR_MEM_INTRINSIC tensor<T, ndimOut> operator()(const Index&... idx) const + { + shape_type start{ internal_generic::get_start(idx)... }; + shape_type stop{ internal_generic::get_stop(idx)... }; + stop = min(*stop, *m_shape); + shape_type strides = m_strides; + for (index_t i = sizeof...(Index); i < dims; ++i) + { + start[i] = 0; + stop[i] = m_shape[i]; + } + T* data = m_data + calc_index(start); + shape_type shape = *stop - *start; + + return tensor<T, ndimOut>{ + data, + internal_generic::compact_shape<dims, ndimOut, std::is_same_v<Index, tensor_range>...>(shape), + internal_generic::compact_shape<dims, ndimOut, std::is_same_v<Index, tensor_range>...>(strides), + m_finalizer, + }; + } + + using tensor_subscript<T, tensor<T, NDims>, std::make_index_sequence<NDims>>::operator(); + + template <index_t dims> + KFR_MEM_INTRINSIC tensor<T, dims> reshape_may_copy(const shape<dims>& new_shape, + bool allow_copy = false) const + { + if (size_of_shape(new_shape) != m_size) + { + throw std::runtime_error("Invalid shape provided"); + } + /* + TODO: reshape must be possible with non-contiguous arrays: + [256, 256, 1] -> [256, 256] + [256, 256] -> [256, 256, 1] + [256, 256] -> [256, 1, 256] + */ + if (!is_contiguous()) + { + if (allow_copy) + { + tensor<T, dims> result(new_shape); + std::copy(begin(), end(), result.contiguous_begin()); + return result; + } + else + { + throw std::runtime_error("reshape requires contiguous array"); + } + } + return tensor<T, dims>{ + m_data, + new_shape, + internal_generic::strides_for_shape(new_shape), + m_finalizer, + }; + } + + template <index_t dims> + KFR_MEM_INTRINSIC tensor<T, dims> reshape(const shape<dims>& new_shape) const + { + return reshape_may_copy(new_shape, false); + } + + KFR_MEM_INTRINSIC tensor<T, 1> flatten() const { return reshape(shape<1>{ m_size }, false); } + + KFR_MEM_INTRINSIC tensor<T, 1> flatten_may_copy(bool allow_copy = false) const + { + return reshape(shape<1>{ m_size }, allow_copy); + } + + KFR_MEM_INTRINSIC tensor copy() const + { + tensor result(m_shape); + std::copy(begin(), end(), result.contiguous_begin()); + return result; + } + + KFR_MEM_INTRINSIC void copy_from(const tensor& other) { std::copy(other.begin(), other.end(), begin()); } + + template <typename Fn> + KFR_MEM_INTRINSIC void iterate(Fn&& fn) const + { + auto it = begin(); + while (it != end()) + { + fn(*it, it.indices); + ++it; + } + } + + template <typename Fn, typename Tout = std::invoke_result_t<Fn, T>> + KFR_MEM_INTRINSIC tensor<Tout, dims> map(Fn&& fn) const + { + return unary(std::forward<Fn>(fn)); + } + + template <typename Fn, typename Tout = std::invoke_result_t<Fn, T>> + KFR_MEM_INTRINSIC tensor<Tout, dims> unary(Fn&& fn) const + { + tensor<Tout, dims> result(m_shape); + auto dst = result.contiguous_begin_unsafe(); + if (is_contiguous()) + { + auto src = contiguous_begin_unsafe(); + while (src != contiguous_end_unsafe()) + { + *dst = fn(*src); + ++src; + ++dst; + } + } + else + { + auto src = begin(); + while (src != end()) + { + *dst = fn(*src); + ++src; + ++dst; + } + } + return result; + } + + template <typename Fn> + KFR_MEM_INTRINSIC const tensor& unary_inplace(Fn&& fn) const + { + if (is_contiguous()) + { + auto it = contiguous_begin_unsafe(); + while (it != contiguous_end_unsafe()) + { + *it = fn(*it); + ++it; + } + } + else + { + auto it = begin(); + while (it != end()) + { + *it = fn(*it); + ++it; + } + } + return *this; + } + + template <typename Fn> + KFR_MEM_INTRINSIC T reduce(Fn&& fn, T initial = T{}) const + { + T result = initial; + if (is_contiguous()) + { + auto src = contiguous_begin_unsafe(); + while (src != contiguous_end_unsafe()) + { + result = fn(*src, result); + ++src; + } + } + else + { + auto src = begin(); + while (src != end()) + { + result = fn(*src, result); + ++src; + } + } + return result; + } + + template <typename Fn, typename U, typename Tout = std::invoke_result_t<Fn, T, U>> + KFR_MEM_INTRINSIC tensor<Tout, dims> binary(const tensor<U, dims>& rhs, Fn&& fn) const + { + tensor<Tout, dims> result(m_shape); + if (is_contiguous() && rhs.is_contiguous()) + { + auto src1 = contiguous_begin_unsafe(); + auto src2 = rhs.contiguous_begin_unsafe(); + auto dst = result.contiguous_begin_unsafe(); + while (src1 != contiguous_end_unsafe()) + { + *dst = fn(*src1, *src2); + ++src1; + ++src2; + ++dst; + } + } + else + { + auto src1 = begin(); + auto src2 = rhs.begin(); + auto dst = result.contiguous_begin(); + while (src1 != end()) + { + *dst = fn(*src1, *src2); + ++src1; + ++src2; + ++dst; + } + } + return result; + } + + template <typename Fn, typename U> + KFR_MEM_INTRINSIC const tensor& binary_inplace(const tensor<U, dims>& rhs, Fn&& fn) const + { + if (is_contiguous() && rhs.is_contiguous()) + { + auto it = contiguous_begin_unsafe(); + auto src2 = rhs.contiguous_begin_unsafe(); + while (it != contiguous_end_unsafe()) + { + *it = fn(*it, *src2); + ++it; + ++src2; + } + } + else + { + auto it = begin(); + auto src2 = rhs.begin(); + while (it != end()) + { + *it = fn(*it, *src2); + ++it; + ++src2; + } + } + return *this; + } + + template <typename U> + KFR_MEM_INTRINSIC tensor<U, dims> astype() const + { + return unary([](T value) { return static_cast<U>(value); }); + } + + template <size_t Nout> + KFR_MEM_INTRINSIC std::array<T, Nout> to_array() const + { + if (m_size != Nout) + throw std::range_error("Nout != m_size"); + std::array<T, Nout> result; + if (is_contiguous()) + std::copy(contiguous_begin(), contiguous_end(), result.begin()); + else + std::copy(begin(), end(), result.begin()); + return result; + } + struct nested_iterator_t + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = tensor<T, dims - 1>; + using pointer = const value_type*; + using reference = const value_type&; + + const tensor* src; + size_t index; + + KFR_MEM_INTRINSIC value_type operator*() { return src->operator()(index); } + KFR_MEM_INTRINSIC pointer operator->() { return &operator*(); } + + // prefix + KFR_MEM_INTRINSIC nested_iterator_t& operator++() + { + ++index; + return *this; + } + + // postfix + KFR_MEM_INTRINSIC nested_iterator_t operator++(int) + { + nested_iterator_t temp = *this; + ++*this; + return temp; + } + + KFR_MEM_INTRINSIC bool operator==(const nested_iterator_t& it) const + { + return src == it.src && index == it.index; + } + KFR_MEM_INTRINSIC bool operator!=(const nested_iterator_t& it) const { return !operator==(it); } + }; + + using nested_iterator = std::conditional_t<dims == 1, tensor_iterator, nested_iterator_t>; + + KFR_MEM_INTRINSIC nested_iterator nested_begin() const + { + if constexpr (dims == 1) + return begin(); + else + return { this, 0 }; + } + KFR_MEM_INTRINSIC nested_iterator nested_end() const + { + if constexpr (dims == 1) + return end(); + else + return { this, m_shape[0] }; + } + + KFR_MEM_INTRINSIC memory_finalizer finalizer() const { return m_finalizer; } + + template <typename Input, index_t Dims = expression_traits<Input>::dims> + KFR_MEM_INTRINSIC const tensor& operator=(Input&& input) const& + { + tprocess(*this, input); + return *this; + } + template <typename Input, index_t Dims = expression_traits<Input>::dims> + KFR_MEM_INTRINSIC tensor& operator=(Input&& input) && + { + tprocess(*this, input); + return *this; + } + + // template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>)> + // tensor(Input&& input) : tensor(make_vector<index_t>(input.size())) + // { + // this->assign_expr(std::forward<Input>(input)); + // } + + template <size_t N, bool only_last = false> + KFR_MEM_INTRINSIC shape<N> offsets(shape<dims> indices) const + { + kfr::shape<N> result; + if constexpr (only_last) + { + index_t base = calc_index(indices); + result = base + enumerate(vec_shape<index_t, N>{}, m_shape.back()); + } + else + { + for (index_t i = 0; i < N; ++i) + { + result[i] = calc_index(indices); + internal_generic::increment_indices(indices, kfr::shape<dims>(0), m_shape); + } + } + return result; + } + + template <size_t N, bool only_last = false> + KFR_MEM_INTRINSIC shape<N> offsets(size_t flat_index) const + { + kfr::shape<dims> indices; + for (index_t i = 0; i < dims; ++i) + { + indices[dims - 1 - i] = flat_index % m_shape[dims - 1 - i]; + flat_index /= m_shape[dims - 1 - i]; + } + return offsets<N, only_last>(indices); + } + + bool operator==(const tensor& other) const + { + return shape() == other.shape() && std::equal(begin(), end(), other.begin()); + } + bool operator!=(const tensor& other) const { return !operator==(other); } + + KFR_MEM_INTRINSIC const shape_type& shape() const { return m_shape; } + KFR_MEM_INTRINSIC const shape_type& strides() const { return m_strides; } + + KFR_MEM_INTRINSIC bool is_contiguous() const { return m_is_contiguous; } + + KFR_MEM_INTRINSIC bool is_last_contiguous() const { return m_strides.back() == 1; } + +private: + template <typename Input> + KFR_MEM_INTRINSIC void assign_expr(Input&& input) const + { + process(*this, std::forward<Input>(input)); + } + + T* m_data; + index_t m_size; + bool m_is_contiguous; + shape_type m_shape; + shape_type m_strides; + memory_finalizer m_finalizer; +}; + +template <typename T> +struct tensor<T, 0> +{ +}; + +template <typename Container, CMT_ENABLE_IF(kfr::has_data_size<Container>), + typename T = typename Container::value_type> +KFR_INTRINSIC tensor<T, 1> tensor_from_container(Container vector) +{ + struct vector_finalizer + { + mutable std::optional<Container> vector; + void operator()(void*) const { vector.reset(); } + }; + + vector_finalizer finalizer{ std::move(vector) }; + + memory_finalizer mem = std::shared_ptr<void>(nullptr, std::move(finalizer)); + vector_finalizer* fin = std::get_deleter<vector_finalizer>(mem); + + return tensor<T, 1>(fin->vector->data(), make_vector<index_t>(fin->vector->size()), std::move(mem)); +} + +template <typename T, index_t Dims> +struct expression_traits<tensor<T, Dims>> : expression_traits_defaults +{ + using value_type = T; + constexpr static size_t dims = Dims; + + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const tensor<T, Dims>& self) + { + return self.shape(); + } + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return { 0 }; } +}; + +inline namespace CMT_ARCH_NAME +{ + +template <typename T, index_t NDims, typename U, size_t N> +KFR_INTRINSIC vec<U, N> get_elements(const tensor<T, NDims>& self, const shape<NDims>& index, vec_shape<U, N>) +{ + const T* data = self.data() + self.calc_index(index); + if (self.is_last_contiguous()) + { + return static_cast<vec<U, N>>(read<N>(data)); + } + else + { + return static_cast<vec<U, N>>(gather_stride<N>(data, self.strides().back())); + } +} + +template <typename T, index_t NDims, typename U, size_t N> +KFR_INTRINSIC void set_elements(const tensor<T, NDims>& self, const shape<NDims>& index, + const vec<U, N>& value) +{ + T* data = self.data() + self.calc_index(index); + if (self.is_last_contiguous()) + { + write(data, vec<T, N>(value)); + } + else + { + scatter_stride(data, vec<T, N>(value), self.strides().back()); + } +} + +template <typename T, index_t dims1, index_t dims2, typename Fn, index_t outdims = const_max(dims1, dims2)> +tensor<T, outdims> tapply(const tensor<T, dims1>& x, const tensor<T, dims2>& y, Fn&& fn) +{ + shape<outdims> xyshape = internal_generic::common_shape(x.shape(), y.shape()); + + tensor<T, outdims> result(xyshape); + + shape<outdims> xshape = padlow<outdims - dims1>(*x.shape(), 1); + shape<outdims> yshape = padlow<outdims - dims2>(*y.shape(), 1); + + tensor<T, outdims> xx = x.reshape(xshape); + tensor<T, outdims> yy = y.reshape(yshape); + + result.iterate([&](T& val, const shape<outdims>& index) + { val = fn(xx.access(xshape.adapt(index)), yy.access(yshape.adapt(index))); }); + + return result; +} + +} // namespace CMT_ARCH_NAME + +} // namespace kfr + +namespace cometa +{ +template <size_t dims> +struct representation<kfr::shape<dims>> +{ + using type = std::string; + static std::string get(const kfr::shape<dims>& value) + { + if constexpr (dims == 0) + { + return "()"; + } + else + { + std::string s; + for (size_t i = 0; i < dims; ++i) + { + if (CMT_LIKELY(i > 0)) + s += ", "; + s += as_string(value[i]); + } + return s; + } + } +}; +} // namespace cometa + +CMT_PRAGMA_MSVC(warning(pop)) +\ No newline at end of file