commit b94a2b4eb46bd3ac63daab644c7e46da2c7f3db8
parent 8a8ba7ae333ee7de7c3b703908e2c0fa8b92936a
Author: [email protected] <[email protected]>
Date: Tue, 2 Aug 2022 17:53:43 +0100
kfr::tensor
Diffstat:
4 files changed, 1887 insertions(+), 26 deletions(-)
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -29,6 +29,7 @@
#include "../simd/shuffle.hpp"
#include "../simd/types.hpp"
#include "../simd/vec.hpp"
+#include "shape.hpp"
#include <tuple>
#ifdef KFR_STD_COMPLEX
@@ -55,43 +56,242 @@ struct complex;
#endif
#endif
-constexpr size_t inout_context_size = 16;
+template <typename T, typename V = void>
+struct expression_traits;
-struct coutput_context
+template <typename T>
+using expression_value_type = typename expression_traits<T>::value_type;
+
+template <typename T>
+constexpr inline size_t expression_dims = expression_traits<T>::dims;
+
+template <typename T>
+constexpr inline shape<expression_dims<T>> shapeof(T&& expr)
+{
+ return expression_traits<T>::shapeof(expr);
+}
+template <typename T>
+constexpr inline shape<expression_dims<T>> shapeof()
+{
+ return expression_traits<T>::shapeof();
+}
+
+template <typename T>
+struct expression_traits<T&, std::void_t<expression_value_type<T>>> : expression_traits<T>
+{
+};
+template <typename T>
+struct expression_traits<T&&, std::void_t<expression_value_type<T>>> : expression_traits<T>
+{
+};
+template <typename T>
+struct expression_traits<const T&, std::void_t<expression_value_type<T>>> : expression_traits<T>
+{
+};
+template <typename T>
+struct expression_traits<const T&&, std::void_t<typename expression_traits<T>::value_type>>
+ : expression_traits<T>
{
- pconstvoid data[inout_context_size];
};
-struct cinput_context
+struct expression_traits_defaults
{
- pconstvoid data[inout_context_size];
+ // using value_type = void;
+ // constexpr static size_t dims = 0;
+ // constexpr static shape<dims> shapeof(const T&);
+ // constexpr static shape<dims> shapeof();
+
+ constexpr static inline bool explicit_operand = true;
};
-using coutput_t = const coutput_context*;
-using cinput_t = const cinput_context*;
+namespace internal_generic
+{
+template <typename... Xs>
+using expressions_check = std::enable_if_t<(expression_traits<Xs>::explicit_operand || ...)>;
+}
-constexpr cinput_t cinput = nullptr;
-constexpr coutput_t coutput = nullptr;
+#define KFR_ACCEPT_EXPRESSIONS(...) internal_generic::expressions_check<__VA_ARGS__>* = nullptr
+
+template <typename T>
+struct expression_traits<T, std::enable_if_t<is_simd_type<T>>> : expression_traits_defaults
+{
+ using value_type = T;
+ constexpr static size_t dims = 0;
+ constexpr static inline bool explicit_operand = false;
-constexpr size_t infinite_size = static_cast<size_t>(-1);
+ KFR_MEM_INTRINSIC constexpr static shape<0> shapeof(const T& self) { return {}; }
+ KFR_MEM_INTRINSIC constexpr static shape<0> shapeof() { return {}; }
+};
-CMT_INTRINSIC constexpr size_t size_add(size_t x, size_t y)
+inline namespace CMT_ARCH_NAME
+{
+template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_simd_type<std::decay_t<T>>)>
+KFR_MEM_INTRINSIC vec<U, N> get_elements(T&& self, const shape<0>& index, vec_shape<U, N> sh)
+{
+ return self;
+}
+template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_simd_type<std::decay_t<T>>)>
+KFR_MEM_INTRINSIC void set_elements(T& self, const shape<0>& index, const vec<U, N>& val)
{
- return (x == infinite_size || y == infinite_size) ? infinite_size : x + y;
+ static_assert(N == 1);
+ static_assert(!std::is_const_v<T>);
+ self = val.front();
}
+} // namespace CMT_ARCH_NAME
+
+inline namespace CMT_ARCH_NAME
+{
-CMT_INTRINSIC constexpr size_t size_sub(size_t x, size_t y)
+template <typename Out, typename In, size_t w, size_t gw, typename Tin, index_t outdims, index_t indims>
+KFR_INTRINSIC static void tprocess_body(Out&& out, In&& in, size_t start, size_t stop, size_t insize,
+ shape<outdims> outidx, shape<indims> inidx)
{
- return (x == infinite_size || y == infinite_size) ? infinite_size : (x > y ? x - y : 0);
+ size_t x = start;
+ if constexpr (w > gw)
+ {
+ CMT_LOOP_NOUNROLL
+ for (; x < stop / w * w; x += w)
+ {
+ outidx.set_revindex(0, x);
+ inidx.set_revindex(0, std::min(x, insize - 1));
+ set_elements(out, outidx, get_elements(in, inidx, vec_shape<Tin, w>()));
+ }
+ }
+ CMT_LOOP_NOUNROLL
+ for (; x < stop / gw * gw; x += gw)
+ {
+ outidx.set_revindex(0, x);
+ inidx.set_revindex(0, std::min(x, insize - 1));
+ set_elements(out, outidx, get_elements(in, inidx, vec_shape<Tin, gw>()));
+ }
}
-CMT_INTRINSIC constexpr size_t size_min(size_t x) CMT_NOEXCEPT { return x; }
+template <size_t width = 0, typename Out, typename In, size_t gw = 1,
+ CMT_ENABLE_IF(expression_traits<Out>::dims == 0)>
+static auto tprocess(Out&& out, In&& in, shape<0> = {}, shape<0> = {}, csize_t<gw> = {}) -> shape<0>
+{
+ set_elements(out, shape<0>{},
+ get_elements(in, shape<0>{}, vec_shape<typename expression_traits<In>::value_type, 1>()));
+ return {};
+}
-template <typename... Ts>
-CMT_INTRINSIC constexpr size_t size_min(size_t x, size_t y, Ts... rest) CMT_NOEXCEPT
+namespace internal
+{
+
+constexpr size_t select_process_width(size_t width, size_t vec_width, index_t last_dim_size)
+{
+ if (width != 0)
+ return width;
+ if (last_dim_size == 0)
+ return vec_width;
+
+ return std::min(vec_width, last_dim_size);
+}
+} // namespace internal
+
+template <size_t width = 0, typename Out, typename In, size_t gw = 1,
+ typename Tin = expression_value_type<In>, typename Tout = expression_value_type<Out>,
+ index_t outdims = expression_dims<Out>, CMT_ENABLE_IF(expression_dims<Out> > 0)>
+static auto tprocess(Out&& out, In&& in, shape<outdims> start = 0, shape<outdims> size = infinite_size,
+ csize_t<gw> = {}) -> shape<outdims>
{
- return size_min(x < y ? x : y, rest...);
+ constexpr index_t indims = expression_dims<In>;
+ static_assert(outdims >= indims);
+
+ constexpr index_t last_dim_size = prev_poweroftwo(expression_traits<Out>::shapeof().back());
+
+#ifdef NDEBUG
+ constexpr size_t vec_width = maximum_vector_size<Tin>;
+#else
+ constexpr size_t vec_width = vector_width<Tin>;
+#endif
+
+ constexpr size_t w = internal::select_process_width(width, vec_width, last_dim_size);
+
+ const shape<outdims> outshape = shapeof(out);
+ const shape<indims> inshape = shapeof(in);
+ if (CMT_UNLIKELY(!internal_generic::can_assign_from(outshape, inshape)))
+ return { 0 };
+ shape<outdims> stop = min(start + size, outshape);
+
+ // min(out, in, size + start) - start
+
+ shape<outdims> outidx;
+ if constexpr (outdims == 1)
+ {
+ outidx = shape<outdims>{ 0 };
+ tprocess_body<Out, In, w, gw, Tin, outdims, indims>(
+ std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0),
+ inshape.revindex(0), outidx, inshape.adapt(outidx));
+ }
+ else if constexpr (outdims == 2)
+ {
+ for (index_t x = start.revindex(1); x < stop.revindex(1); ++x)
+ {
+ outidx = shape<outdims>{ x, 0 };
+ tprocess_body<Out, In, w, gw, Tin, outdims, indims>(
+ std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0),
+ inshape.revindex(0), outidx, inshape.adapt(outidx));
+ }
+ }
+ else if constexpr (outdims == 3)
+ {
+ for (index_t x = start.revindex(2); x < stop.revindex(2); ++x)
+ {
+ for (index_t y = start.revindex(1); y < stop.revindex(1); ++y)
+ {
+ outidx = shape<outdims>{ x, y, 0 };
+ tprocess_body<Out, In, w, gw, Tin, outdims, indims>(
+ std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0),
+ inshape.revindex(0), outidx, inshape.adapt(outidx));
+ }
+ }
+ }
+ else if constexpr (outdims == 4)
+ {
+ for (index_t x = start.revindex(3); x < stop.revindex(3); ++x)
+ {
+ for (index_t y = start.revindex(2); y < stop.revindex(2); ++y)
+ {
+ for (index_t z = start.revindex(1); z < stop.revindex(1); ++z)
+ {
+ outidx = shape<outdims>{ x, y, z, 0 };
+ tprocess_body<Out, In, w, gw, Tin, outdims, indims>(
+ std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0),
+ inshape.revindex(0), outidx, inshape.adapt(outidx));
+ }
+ }
+ }
+ }
+ else
+ {
+ shape<outdims> outidx = start;
+ if (CMT_UNLIKELY(!internal_generic::compare_indices(outidx, stop, outdims - 2)))
+ return stop;
+ do
+ {
+ tprocess_body<Out, In, w, gw, Tin, outdims, indims>(
+ std::forward<Out>(out), std::forward<In>(in), start.revindex(0), stop.revindex(0),
+ inshape.revindex(0), outidx, inshape.adapt(outidx));
+ } while (internal_generic::increment_indices(outidx, start, stop, outdims - 2));
+ }
+ return stop;
}
+} // namespace CMT_ARCH_NAME
+
+struct coutput_context
+{
+};
+
+struct cinput_context
+{
+};
+
+using coutput_t = const coutput_context*;
+using cinput_t = const cinput_context*;
+
+constexpr cinput_t cinput = nullptr;
+constexpr coutput_t coutput = nullptr;
/// @brief Base class of all input expressoins
struct input_expression
@@ -229,10 +429,22 @@ internal::expression_lambda<T, decay<Fn>> lambda(Fn&& fn)
return internal::expression_lambda<T, decay<Fn>>(std::move(fn));
}
-namespace internal
+} // namespace CMT_ARCH_NAME
+
+namespace internal_generic
{
template <typename T, typename = void>
+struct has_static_size_impl : std::false_type
+{
+};
+
+template <typename T>
+struct has_static_size_impl<T, std::void_t<decltype(T::size())>> : std::true_type
+{
+};
+
+template <typename T, typename = void>
struct is_infinite_impl : std::false_type
{
};
@@ -242,10 +454,231 @@ struct is_infinite_impl<T, void_t<decltype(T::size())>>
: std::integral_constant<bool, T::size() == infinite_size>
{
};
-} // namespace internal
+} // namespace internal_generic
+
+template <typename T>
+constexpr inline bool is_infinite = internal_generic::is_infinite_impl<T>::value;
+
+template <typename T>
+constexpr inline bool has_static_size = internal_generic::has_static_size_impl<T>::value;
template <typename T>
-constexpr inline bool is_infinite = internal::is_infinite_impl<T>::value;
+struct expression_traits<T, std::enable_if_t<std::is_base_of_v<input_expression, T>>>
+ : expression_traits_defaults
+{
+ using value_type = value_type_of<T>;
+ constexpr static size_t dims = 1;
+
+ constexpr static shape<1> shapeof(const T& self) { return self.size(); }
+ constexpr static shape<1> shapeof()
+ {
+ if constexpr (has_static_size<T>)
+ {
+ return { T::size() };
+ }
+ else
+ {
+ return { 0 };
+ }
+ }
+};
+
+inline namespace CMT_ARCH_NAME
+{
+template <typename T, typename U, size_t N, KFR_ENABLE_IF(is_input_expression<T>)>
+KFR_MEM_INTRINSIC vec<U, N> get_elements(T&& self, const shape<1>& index, vec_shape<U, N> sh)
+{
+ return get_elements(self, cinput_t{}, index[0], sh);
+}
+} // namespace CMT_ARCH_NAME
+
+template <typename... Args>
+struct xwitharguments
+{
+ constexpr static size_t count = sizeof...(Args);
+
+ using type_list = ctypes_t<Args...>;
+
+ template <size_t idx>
+ using nth = typename type_list::template nth<idx>;
+
+ std::tuple<Args...> args;
+ std::array<dimset, count> masks;
+
+ KFR_INTRINSIC auto& first() { return std::get<0>(args); }
+ KFR_INTRINSIC const auto& first() const { return std::get<0>(args); }
+
+ template <size_t idx>
+ KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const
+ {
+ static_assert(idx < count);
+ using Traits = expression_traits<nth<idx>>;
+ if constexpr (Traits::dims == 0)
+ {
+ return -1;
+ }
+ else
+ {
+ constexpr shape<Traits::dims> sh = Traits::shapeof();
+ if constexpr (sh.cproduct() > 0)
+ {
+ return sh.tomask();
+ }
+ else
+ {
+ return std::get<idx>(masks);
+ }
+ }
+ }
+
+ template <typename Fn>
+ KFR_INTRINSIC constexpr auto fold(Fn&& fn) const
+ {
+ return fold_impl(std::forward<Fn>(fn), csizeseq<count>);
+ }
+ template <typename Fn>
+ KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn)
+ {
+ return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>);
+ }
+
+ KFR_INTRINSIC xwitharguments(Args&&... args) : args{ std::forward<Args>(args)... }
+ {
+ cforeach(csizeseq<count>,
+ [&](auto idx_) CMT_INLINE_LAMBDA
+ {
+ constexpr size_t idx = val_of(decltype(idx_)());
+ shape sh = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args));
+ masks[idx] = sh.tomask();
+ });
+ }
+
+private:
+ template <typename Fn, size_t... indices>
+ KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const
+ {
+ return fn(std::get<indices>(args)...);
+ }
+ template <typename Fn, size_t... indices>
+ KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>)
+ {
+ return fn(csize<indices>...);
+ }
+};
+
+template <typename... Args>
+xwitharguments(Args&&... args) -> xwitharguments<Args...>;
+
+template <index_t Dims, typename Arg>
+struct xreshape : public xwitharguments<Arg>
+{
+ shape<Dims> old_shape;
+ shape<Dims> new_shape;
+};
+
+template <typename Fn, typename... Args>
+struct xfunction : public xwitharguments<Args...>
+{
+ Fn fn;
+
+ KFR_INTRINSIC xfunction(xwitharguments<Args...> args, Fn&& fn)
+ : xwitharguments<Args...>{ std::move(args) }, fn(std::move(fn))
+ {
+ }
+};
+
+template <typename... Args, typename Fn>
+xfunction(const xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>;
+template <typename... Args, typename Fn>
+xfunction(xwitharguments<Args...>&& args, Fn&& fn) -> xfunction<Fn, Args...>;
+template <typename... Args, typename Fn>
+xfunction(xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>;
+
+template <typename Fn, typename... Args>
+struct expression_traits<xfunction<Fn, Args...>> : expression_traits_defaults
+{
+ using E = xfunction<Fn, Args...>;
+
+ using value_type =
+ typename std::invoke_result_t<Fn,
+ vec<typename expression_traits<Args>::value_type, 1>...>::value_type;
+ constexpr static size_t dims = const_max(expression_traits<Args>::dims...);
+
+ constexpr static shape<dims> shapeof(const E& self)
+ {
+ return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA -> auto {
+ return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...);
+ });
+ }
+ constexpr static shape<dims> shapeof()
+ {
+ return xfunction<Fn, Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA -> auto {
+ return internal_generic::common_shape(
+ expression_traits<typename E::template nth<val_of(decltype(args)())>>::shapeof()...);
+ });
+ }
+};
+
+template <index_t Dims, typename Arg>
+struct expression_traits<xreshape<Dims, Arg>> : expression_traits_defaults
+{
+ using value_type = typename expression_traits<Arg>::value_type;
+ constexpr static size_t dims = Dims;
+
+ constexpr static shape<dims> shapeof(const xreshape<Dims, Arg>& self) { return self.new_shape; }
+};
+
+inline namespace CMT_ARCH_NAME
+{
+
+namespace internal
+{
+template <index_t outdims, typename Fn, typename... Args, typename U, size_t N, index_t Dims, size_t idx>
+KFR_MEM_INTRINSIC vec<U, N> get_arg(const xfunction<Fn, Args...>& self, const shape<Dims>& index,
+ vec_shape<U, N> sh, csize_t<idx>)
+{
+ using Traits = expression_traits<typename xfunction<Fn, Args...>::template nth<idx>>;
+ if constexpr (Traits::dims == 0)
+ {
+ return repeat<N>(get_elements(std::get<idx>(self.args), {}, vec_shape<U, 1>{}));
+ }
+ else
+ {
+ auto indices = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>));
+ constexpr index_t last_dim = Traits::shapeof().back();
+ if constexpr (last_dim > 0)
+ {
+ return repeat<N / std::min(last_dim, N)>(
+ get_elements(std::get<idx>(self.args), indices, vec_shape<U, std::min(last_dim, N)>{}));
+ }
+ else
+ {
+ if constexpr (N > 1)
+ {
+ if (CMT_UNLIKELY(self.masks[idx].back() == 0))
+ return get_elements(std::get<idx>(self.args), indices, vec_shape<U, 1>{}).front();
+ else
+ return get_elements(std::get<idx>(self.args), indices, sh);
+ }
+ else
+ {
+ return get_elements(std::get<idx>(self.args), indices, sh);
+ }
+ }
+ }
+}
+} // namespace internal
+
+template <typename Fn, typename... Args, typename U, size_t N, index_t Dims>
+KFR_MEM_INTRINSIC vec<U, N> get_elements(const xfunction<Fn, Args...>& self, const shape<Dims>& index,
+ vec_shape<U, N> sh)
+{
+ constexpr index_t outdims = expression_traits<xfunction<Fn, Args...>>::dims;
+ return self.fold_idx(
+ [&](auto... idx) CMT_INLINE_LAMBDA -> vec<U, N> {
+ return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...);
+ });
+}
namespace internal
{
@@ -260,7 +693,10 @@ struct expression_with_arguments : input_expression
constexpr static size_t count = sizeof...(Args);
expression_with_arguments() = delete;
- constexpr expression_with_arguments(Args&&... args) CMT_NOEXCEPT : args(std::forward<Args>(args)...) {}
+ KFR_MEM_INTRINSIC constexpr expression_with_arguments(Args&&... args) CMT_NOEXCEPT
+ : args(std::forward<Args>(args)...)
+ {
+ }
KFR_MEM_INTRINSIC void begin_block(cinput_t cinput, size_t size) const
{
@@ -427,9 +863,9 @@ CMT_INTRINSIC internal::expression_function<Fn, NewArgs...> rebind(
template <size_t width = 0, typename OutputExpr, typename InputExpr, size_t groupsize = 1,
typename Tvec = vec<value_type_of<InputExpr>, 1>>
-static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0,
- size_t size = infinite_size, coutput_t coutput = nullptr,
- cinput_t cinput = nullptr, csize_t<groupsize> = csize_t<groupsize>())
+static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0, size_t size = infinite_size,
+ coutput_t coutput = nullptr, cinput_t cinput = nullptr,
+ csize_t<groupsize> = csize_t<groupsize>())
{
using Tin = value_type_of<InputExpr>;
static_assert(is_output_expression<OutputExpr>, "OutFn must be an expression");
@@ -444,7 +880,7 @@ static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0,
#ifdef NDEBUG
constexpr size_t w = width == 0 ? maximum_vector_size<Tin> : width;
#else
- constexpr size_t w = width == 0 ? vector_width<Tin> : width;
+ constexpr size_t w = width == 0 ? vector_width<Tin> : width;
#endif
static_assert(w > 0 && is_poweroftwo(w), "");
diff --git a/include/kfr/base/impl/static_array.hpp b/include/kfr/base/impl/static_array.hpp
@@ -0,0 +1,87 @@
+#include "../../cometa.hpp"
+#include "../../kfr.h"
+
+namespace kfr
+{
+using namespace cometa;
+
+template <typename T, size_t>
+using type_for = T;
+
+template <typename T, typename indices_t>
+struct static_array_base;
+
+template <typename T, size_t... indices>
+struct static_array_base<T, csizes_t<indices...>>
+{
+ using value_type = T;
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
+ using reference = value_type&;
+ using pointer = value_type*;
+ using iterator = pointer;
+ using const_reference = const value_type&;
+ using const_pointer = const value_type*;
+ using const_iterator = const_pointer;
+
+ constexpr static size_t static_size = sizeof...(indices);
+
+ constexpr static_array_base() = default;
+ constexpr static_array_base(const static_array_base&) = default;
+ constexpr static_array_base(static_array_base&&) = default;
+
+ KFR_MEM_INTRINSIC constexpr static_array_base(type_for<value_type, indices>... args)
+ {
+ (static_cast<void>(array[indices] = args), ...);
+ }
+
+ constexpr static_array_base& operator=(const static_array_base&) = default;
+ constexpr static_array_base& operator=(static_array_base&&) = default;
+
+ template <int dummy = 0, CMT_ENABLE_IF(dummy == 0 && static_size > 1)>
+ KFR_MEM_INTRINSIC constexpr static_array_base(value_type value)
+ {
+ (static_cast<void>(array[indices] = value), ...);
+ }
+
+ KFR_MEM_INTRINSIC vec<T, static_size> operator*() const { return read<static_size>(data()); }
+
+ KFR_MEM_INTRINSIC static_array_base(const vec<T, static_size>& v) { write(data(), v); }
+
+ KFR_MEM_INTRINSIC constexpr const value_type* data() const { return std::data(array); }
+ KFR_MEM_INTRINSIC constexpr value_type* data() { return std::data(array); }
+
+ KFR_MEM_INTRINSIC constexpr const_iterator begin() const { return std::begin(array); }
+ KFR_MEM_INTRINSIC constexpr iterator begin() { return std::begin(array); }
+ KFR_MEM_INTRINSIC constexpr const_iterator cbegin() const { return std::begin(array); }
+
+ KFR_MEM_INTRINSIC constexpr const_iterator end() const { return std::end(array); }
+ KFR_MEM_INTRINSIC constexpr iterator end() { return std::end(array); }
+ KFR_MEM_INTRINSIC constexpr const_iterator cend() const { return std::end(array); }
+
+ KFR_MEM_INTRINSIC constexpr const_reference operator[](size_t index) const { return array[index]; }
+ KFR_MEM_INTRINSIC constexpr reference operator[](size_t index) { return array[index]; }
+
+ KFR_MEM_INTRINSIC constexpr const_reference front() const { return array[0]; }
+ KFR_MEM_INTRINSIC constexpr reference front() { return array[0]; }
+
+ KFR_MEM_INTRINSIC constexpr const_reference back() const { return array[static_size - 1]; }
+ KFR_MEM_INTRINSIC constexpr reference back() { return array[static_size - 1]; }
+
+ KFR_MEM_INTRINSIC constexpr bool empty() const { return false; }
+
+ KFR_MEM_INTRINSIC constexpr size_t size() const { return std::size(array); }
+
+ KFR_MEM_INTRINSIC constexpr bool operator==(const static_array_base& other) const
+ {
+ return ((array[indices] == other.array[indices]) && ...);
+ }
+ KFR_MEM_INTRINSIC constexpr bool operator!=(const static_array_base& other) const
+ {
+ return !operator==(other);
+ }
+
+private:
+ T array[static_size];
+};
+} // namespace kfr
diff --git a/include/kfr/base/shape.hpp b/include/kfr/base/shape.hpp
@@ -0,0 +1,515 @@
+#include "impl/static_array.hpp"
+
+#include "../simd/shuffle.hpp"
+#include "../simd/types.hpp"
+#include "../simd/vec.hpp"
+
+#include <bitset>
+
+namespace kfr
+{
+
+#ifndef KFR_32BIT_INDICES
+using index_t = size_t;
+#else
+using index_t = uint32_t;
+#endif
+constexpr inline index_t max_index_t = std::numeric_limits<index_t>::max();
+
+constexpr inline index_t infinite_size = max_index_t;
+
+constexpr inline index_t maximum_dims = 8;
+
+using dimset = vec<i8, maximum_dims>;
+
+CMT_INTRINSIC constexpr index_t size_add(index_t x, index_t y)
+{
+ return (x == infinite_size || y == infinite_size) ? infinite_size : x + y;
+}
+
+CMT_INTRINSIC constexpr index_t size_sub(index_t x, index_t y)
+{
+ return (x == infinite_size || y == infinite_size) ? infinite_size : (x > y ? x - y : 0);
+}
+
+CMT_INTRINSIC constexpr index_t size_min(index_t x) CMT_NOEXCEPT { return x; }
+
+template <typename... Ts>
+CMT_INTRINSIC constexpr index_t size_min(index_t x, index_t y, Ts... rest) CMT_NOEXCEPT
+{
+ return size_min(x < y ? x : y, rest...);
+}
+
+template <index_t dims>
+struct shape;
+
+namespace internal_generic
+{
+template <index_t dims>
+KFR_INTRINSIC bool increment_indices(shape<dims>& indices, const shape<dims>& start, const shape<dims>& stop,
+ index_t dim = dims - 1);
+}
+
+template <index_t dims>
+struct shape : static_array_base<index_t, csizeseq_t<dims>>
+{
+ using static_array_base<index_t, csizeseq_t<dims>>::static_array_base;
+
+ static_assert(dims < maximum_dims);
+
+ shape operator+(const shape& other) const
+ {
+ vec<index_t, dims> x = **this;
+ vec<index_t, dims> y = *other;
+ mask<index_t, dims> inf = (x == infinite_size) || (y == infinite_size);
+ return select(inf, infinite_size, x + y);
+ }
+ shape operator-(const shape& other) const
+ {
+ vec<index_t, dims> x = **this;
+ vec<index_t, dims> y = *other;
+ mask<index_t, dims> inf = (x == infinite_size) || (y == infinite_size);
+ return select(inf, infinite_size, x - y);
+ }
+
+ friend shape min(const shape& x, const shape& y) { return kfr::min(*x, *y); }
+
+ KFR_MEM_INTRINSIC size_t to_flat(const shape<dims>& indices) const
+ {
+ size_t result = 0;
+ size_t scale = 1;
+ for (size_t i = 0; i < dims; ++i)
+ {
+ result += scale * indices[dims - 1 - i];
+ scale *= (*this)[dims - 1 - i];
+ }
+ return result;
+ }
+ KFR_MEM_INTRINSIC shape<dims> from_flat(size_t index) const
+ {
+ shape<dims> indices;
+ for (size_t i = 0; i < dims; ++i)
+ {
+ size_t sz = (*this)[dims - 1 - i];
+ indices[dims - 1 - i] = index % sz;
+ index /= sz;
+ }
+ return indices;
+ }
+
+ KFR_MEM_INTRINSIC index_t dot(const shape& other) const
+ {
+ if constexpr (dims == 1)
+ {
+ return (*this)[0] * other[0];
+ }
+ else if constexpr (dims == 2)
+ {
+ return (*this)[0] * other[0] + (*this)[1] * other[1];
+ }
+ else
+ {
+ return hdot(**this, *other);
+ }
+ }
+
+ template <index_t indims>
+ KFR_MEM_INTRINSIC shape adapt(const shape<indims>& other) const
+ {
+ static_assert(indims >= dims);
+ return min(other.template trim<dims>(), **this - 1);
+ }
+
+ KFR_MEM_INTRINSIC index_t product() const { return hproduct(**this); }
+ KFR_MEM_INTRINSIC constexpr index_t cproduct() const
+ {
+ index_t result = this->front();
+ for (index_t i = 1; i < dims; i++)
+ result *= this->operator[](i);
+ return result;
+ }
+
+ KFR_MEM_INTRINSIC dimset tomask() const
+ {
+ dimset result = 0;
+ for (index_t i = 0; i < dims; ++i)
+ {
+ result[i + maximum_dims - dims] = this->operator[](i) == 1 ? 0 : -1;
+ }
+ return result;
+ }
+
+ template <size_t odims>
+ shape<odims> trim() const
+ {
+ static_assert(odims <= dims);
+ if constexpr (odims > 0)
+ {
+ return slice<dims - odims, odims>(**this);
+ }
+ else
+ {
+ return {};
+ }
+ }
+
+ KFR_MEM_INTRINSIC shape<dims - 1> trunc() const
+ {
+ if constexpr (dims > 1)
+ {
+ return slice<0, dims - 1>(**this);
+ }
+ else
+ {
+ return {};
+ }
+ }
+
+ shape<dims + 1> extend() const { return concat(**this, vec{ index_t(0) }); }
+
+ KFR_MEM_INTRINSIC constexpr index_t revindex(size_t index) const
+ {
+ return index < dims ? this->operator[](dims - 1 - index) : 1;
+ }
+ KFR_MEM_INTRINSIC constexpr void set_revindex(size_t index, index_t val)
+ {
+ if (index < dims)
+ this->operator[](dims - 1 - index) = val;
+ }
+};
+
+template <>
+struct shape<0>
+{
+ static constexpr size_t static_size = 0;
+
+ static constexpr size_t size() { return static_size; }
+
+ shape() = default;
+ shape(index_t value) {}
+
+ KFR_MEM_INTRINSIC size_t to_flat(const shape<0>& indices) const { return 0; }
+ KFR_MEM_INTRINSIC shape<0> from_flat(size_t index) const { return {}; }
+
+ template <index_t odims>
+ KFR_MEM_INTRINSIC shape<0> adapt(const shape<odims>& other) const
+ {
+ return {};
+ }
+
+ KFR_MEM_INTRINSIC size_t product() const { return 0; }
+
+ KFR_MEM_INTRINSIC dimset tomask() const { return -1; }
+
+ shape<1> extend() const { return { 0 }; }
+
+ template <size_t new_dims>
+ shape<new_dims> trim() const
+ {
+ static_assert(new_dims == 0);
+ return {};
+ }
+
+ KFR_MEM_INTRINSIC bool operator==(const shape<0>& other) const { return true; }
+ KFR_MEM_INTRINSIC bool operator!=(const shape<0>& other) const { return false; }
+
+ KFR_MEM_INTRINSIC index_t revindex(size_t index) const { return 1; }
+ KFR_MEM_INTRINSIC void set_revindex(size_t index, index_t val) {}
+};
+
+template <typename... Args>
+shape(Args&&... args) -> shape<sizeof...(Args)>;
+
+namespace internal_generic
+{
+
+template <index_t outdims, index_t indims>
+KFR_MEM_INTRINSIC shape<outdims> adapt(const shape<indims>& in, const dimset& set)
+{
+ static_assert(indims >= outdims);
+ if constexpr (outdims == 0)
+ {
+ return {};
+ }
+ else
+ {
+ const vec<std::make_signed_t<index_t>, maximum_dims> eset = cast<std::make_signed_t<index_t>>(set);
+ return slice<indims - outdims, outdims>(*in) & slice<maximum_dims - outdims, outdims>(eset);
+ }
+}
+template <index_t outdims>
+KFR_MEM_INTRINSIC shape<outdims> adapt(const shape<0>& in, const dimset& set)
+{
+ static_assert(outdims == 0);
+ return {};
+}
+} // namespace internal_generic
+
+template <size_t Dims>
+struct cursor
+{
+ shape<Dims> current;
+ shape<Dims> minimum;
+ shape<Dims> maximum;
+};
+
+struct tensor_range
+{
+ index_t start = 0;
+ index_t stop = max_index_t;
+ index_t step = 1;
+
+ constexpr KFR_INTRINSIC index_t size() const { return stop - start; }
+};
+
+constexpr KFR_INTRINSIC tensor_range tstart(index_t start, index_t step = 1)
+{
+ return { start, max_index_t, step };
+}
+constexpr KFR_INTRINSIC tensor_range tstop(index_t stop, index_t step = 1) { return { 0, stop, step }; }
+constexpr KFR_INTRINSIC tensor_range trange(index_t start, index_t stop, index_t step = 1)
+{
+ return { start, stop, step };
+}
+constexpr KFR_INTRINSIC tensor_range trange_n(index_t start, index_t size, index_t step = 1)
+{
+ return { start, start + size, step };
+}
+constexpr KFR_INTRINSIC tensor_range tall() { return { 0, max_index_t, 1 }; }
+
+namespace internal_generic
+{
+
+constexpr inline index_t null_index = max_index_t;
+
+template <index_t dims>
+constexpr KFR_INTRINSIC shape<dims> strides_for_shape(const shape<dims>& sh, index_t stride = 1)
+{
+ shape<dims> strides;
+ index_t n = stride;
+ for (index_t i = 0; i < dims; ++i)
+ {
+ strides[dims - 1 - i] = n;
+ n *= sh[dims - 1 - i];
+ }
+ return strides;
+}
+
+template <typename Index>
+constexpr KFR_INTRINSIC index_t get_start(const Index& idx)
+{
+ if constexpr (std::is_same_v<std::decay_t<Index>, tensor_range>)
+ {
+ return idx.start;
+ }
+ else
+ {
+ static_assert(std::is_convertible_v<Index, index_t>);
+ return static_cast<index_t>(idx);
+ }
+}
+template <typename Index>
+constexpr KFR_INTRINSIC index_t get_stop(const Index& idx)
+{
+ if constexpr (std::is_same_v<std::decay_t<Index>, tensor_range>)
+ {
+ return idx.stop;
+ }
+ else
+ {
+ static_assert(std::is_convertible_v<Index, index_t>);
+ return static_cast<index_t>(idx) + 1;
+ }
+}
+template <size_t dims, size_t outdims, bool... ranges>
+constexpr KFR_INTRINSIC shape<outdims> compact_shape(const shape<dims>& in)
+{
+ shape<outdims> result;
+ constexpr std::array flags{ ranges... };
+ size_t j = 0;
+ for (size_t i = 0; i < dims; ++i)
+ {
+ if (i >= flags.size() || flags[i])
+ {
+ result[j++] = in[i];
+ }
+ }
+ return result;
+}
+
+template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)>
+bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shape)
+{
+ for (size_t i = 0; i < outdims; ++i)
+ {
+ index_t dst_size = dst_shape.revindex(i);
+ index_t src_size = src_shape.revindex(i);
+ if (src_size == 1 || src_size == infinite_size || src_size == dst_size)
+ {
+ }
+ else
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)>
+constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<dims2>& shape2)
+{
+ shape<outdims> result;
+ for (size_t i = 0; i < outdims; ++i)
+ {
+ index_t size1 = shape1.revindex(i);
+ index_t size2 = shape2.revindex(i);
+ if (size1 == 1 || size2 == 1 || size1 == size2)
+ {
+ result[outdims - 1 - i] = std::max(size1, size2);
+ }
+ else
+ {
+ // broadcast failed
+ result = 0;
+ return result;
+ }
+ }
+ return result;
+}
+
+template <>
+KFR_MEM_INTRINSIC shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2)
+{
+ return {};
+}
+
+template <index_t dims1, index_t dims2>
+KFR_MEM_INTRINSIC bool same_layout(const shape<dims1>& x, const shape<dims2>& y)
+{
+ for (index_t i = 0, j = 0;;)
+ {
+ while (i < dims1 && x[i] == 1)
+ ++i;
+ while (j < dims2 && y[j] == 1)
+ ++j;
+ if (i == dims1 && j == dims2)
+ {
+ return true;
+ }
+ if (i < dims1 && j < dims2)
+ {
+ if (x[i] != y[j])
+ return false;
+ }
+ else
+ {
+ return false;
+ }
+ ++i;
+ ++j;
+ }
+}
+
+#ifdef KFR_VEC_INDICES
+template <size_t step, index_t dims>
+KFR_INTRINSIC vec<index_t, dims> increment_indices(vec<index_t, dims> indices,
+ const vec<index_t, dims>& start,
+ const vec<index_t, dims>& stop)
+{
+ indices = indices + make_vector(cconcat(cvalseq<index_t, dims - 1 - step, 0, 0>, cvalseq<index_t, 1, 1>,
+ cvalseq<index_t, step, 0, 0>));
+
+ if constexpr (step + 1 < dims)
+ {
+ vec<bit<index_t>, dims> mask = indices >= stop;
+ if (CMT_LIKELY(!any(mask)))
+ return indices;
+ indices = blend(indices, start, cconcat(csizeseq<dims - step - 1, 0, 0>, csizeseq<step + 1, 1, 0>));
+
+ return increment_indices<step + 1>(indices, stop);
+ }
+ else
+ {
+ return indices;
+ }
+}
+#endif
+
+template <index_t dims>
+KFR_INTRINSIC bool compare_indices(const shape<dims>& indices, const shape<dims>& stop,
+ index_t dim = dims - 1)
+{
+ CMT_LOOP_UNROLL
+ for (int i = dim; i >= 0; --i)
+ {
+ if (CMT_UNLIKELY(indices[i] >= stop[i]))
+ return false;
+ }
+ return true;
+}
+
+template <index_t dims>
+KFR_INTRINSIC bool increment_indices(shape<dims>& indices, const shape<dims>& start, const shape<dims>& stop,
+ index_t dim)
+{
+#ifdef KFR_VEC_INDICES
+ vec<index_t, dims> idx = increment_indices<0>(*indices, *start, *stop);
+ indices = idx;
+ if (any(idx == *stop))
+ return false;
+ return true;
+#else
+
+ indices[dim] += 1;
+ CMT_LOOP_UNROLL
+ for (int i = dim; i >= 0;)
+ {
+ if (CMT_LIKELY(indices[i] < stop[i]))
+ return true;
+ // carry
+ indices[i] = start[i];
+ --i;
+ if (i < 0)
+ {
+ return false;
+ }
+ indices[i] += 1;
+ }
+ return true;
+#endif
+}
+
+template <index_t dims>
+KFR_INTRINSIC shape<dims> increment_indices_return(const shape<dims>& indices, const shape<dims>& start,
+ const shape<dims>& stop, index_t dim = dims - 1)
+{
+ shape<dims> result = indices;
+ if (increment_indices(result, start, stop, dim))
+ {
+ return result;
+ }
+ else
+ {
+ return null_index;
+ }
+}
+
+template <typename... Index>
+constexpr KFR_INTRINSIC size_t count_dimensions()
+{
+ return ((std::is_same_v<std::decay_t<Index>, tensor_range> ? 1 : 0) + ...);
+}
+} // namespace internal_generic
+
+template <index_t dims>
+constexpr KFR_INTRINSIC index_t size_of_shape(const shape<dims>& shape)
+{
+ index_t n = 1;
+ for (index_t i = 0; i < dims; ++i)
+ {
+ n *= shape[i];
+ }
+ return n;
+}
+
+} // namespace kfr
diff --git a/include/kfr/base/tensor.hpp b/include/kfr/base/tensor.hpp
@@ -0,0 +1,822 @@
+/** @addtogroup expressions
+ * @{
+ */
+/*
+ Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+ This file is part of KFR
+
+ KFR is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ KFR is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with KFR.
+
+ If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ Buying a commercial license is mandatory as soon as you develop commercial activities without
+ disclosing the source code of your own applications.
+ See https://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include <optional>
+
+#include "../cometa/array.hpp"
+
+#include "../math/logical.hpp"
+#include "../math/min_max.hpp"
+#include "../simd/horizontal.hpp"
+#include "../simd/impl/function.hpp"
+#include "../simd/read_write.hpp"
+#include "../simd/types.hpp"
+#include "memory.hpp"
+
+CMT_PRAGMA_MSVC(warning(push))
+CMT_PRAGMA_MSVC(warning(disable : 4324))
+
+namespace kfr
+{
+
+using memory_finalizer = std::shared_ptr<void>;
+
+template <typename Fn>
+memory_finalizer KFR_INTRINSIC make_memory_finalizer(Fn&& fn)
+{
+ return std::shared_ptr<void>(nullptr, [fn = std::move(fn)](void*) { fn(); });
+}
+
+template <typename T, typename Derived, typename Dims>
+struct tensor_subscript;
+
+template <typename T, typename Derived, index_t... Dims>
+struct tensor_subscript<T, Derived, std::integer_sequence<size_t, Dims...>>
+{
+ constexpr static inline size_t dims = sizeof...(Dims);
+
+ using reference = T&;
+ using const_reference = const T&;
+
+ reference KFR_MEM_INTRINSIC operator()(type_for<index_t, Dims>... idx) const
+ {
+ return static_cast<const Derived&>(*this).access(shape<dims>{ idx... });
+ }
+};
+
+template <typename T, index_t NDims>
+struct tensor : public tensor_subscript<T, tensor<T, NDims>, std::make_index_sequence<NDims>>
+{
+public:
+ using value_type = T;
+ using pointer = T* CMT_RESTRICT;
+ using const_pointer = const T* CMT_RESTRICT;
+ using reference = T&;
+ using const_reference = const T&;
+ using size_type = index_t;
+
+ constexpr static inline index_t dims = NDims;
+
+ using shape_type = shape<dims>;
+
+ struct tensor_iterator
+ {
+ using iterator_category = std::forward_iterator_tag;
+ using difference_type = std::ptrdiff_t;
+ using value_type = T;
+ using pointer = T*;
+ using reference = T&;
+
+ const tensor* src;
+ shape_type indices;
+
+ KFR_MEM_INTRINSIC index_t flat_index() const { return src->calc_index(indices); }
+
+ KFR_MEM_INTRINSIC bool is_end() const { return indices.front() == internal_generic::null_index; }
+
+ template <size_t num>
+ KFR_MEM_INTRINSIC shape<num> advance()
+ {
+ shape<num> result;
+ for (size_t i = 0; i < num; ++i)
+ {
+ result[i] = src->calc_index(indices);
+ ++*this;
+ if (is_end())
+ break;
+ }
+ return result;
+ }
+
+ KFR_MEM_INTRINSIC T& operator*() { return src->m_data[flat_index()]; }
+ KFR_MEM_INTRINSIC T* operator->() { return &operator*(); }
+
+ // prefix
+ KFR_MEM_INTRINSIC tensor_iterator& operator++()
+ {
+ if (!internal_generic::increment_indices(indices, kfr::shape<dims>(0), src->m_shape))
+ {
+ indices = internal_generic::null_index;
+ }
+ return *this;
+ }
+
+ // postfix
+ KFR_MEM_INTRINSIC tensor_iterator operator++(int)
+ {
+ tensor_iterator temp = *this;
+ ++*this;
+ return temp;
+ }
+
+ KFR_MEM_INTRINSIC bool operator==(const tensor_iterator& it) const
+ {
+ return src == it.src && indices == it.indices;
+ }
+ KFR_MEM_INTRINSIC bool operator!=(const tensor_iterator& it) const { return !operator==(it); }
+ };
+
+ using iterator = tensor_iterator;
+ using const_iterator = tensor_iterator;
+
+ using contiguous_iterator = pointer;
+ using const_contiguous_iterator = pointer;
+
+ KFR_MEM_INTRINSIC constexpr tensor()
+ : m_data(0), m_size(0), m_is_contiguous(false), m_shape{}, m_strides{}
+ {
+ }
+
+ KFR_MEM_INTRINSIC tensor(T* data, const shape_type& shape, const shape_type& strides,
+ memory_finalizer finalizer)
+ : m_data(data), m_size(size_of_shape(shape)),
+ m_is_contiguous(strides == internal_generic::strides_for_shape(shape)), m_shape(shape),
+ m_strides(strides), m_finalizer(std::move(finalizer))
+ {
+ }
+
+ KFR_MEM_INTRINSIC tensor(T* data, const shape_type& shape, memory_finalizer finalizer)
+ : m_data(data), m_size(size_of_shape(shape)), m_is_contiguous(true), m_shape(shape),
+ m_strides(internal_generic::strides_for_shape(shape)), m_finalizer(std::move(finalizer))
+ {
+ }
+
+ KFR_INTRINSIC static T* allocate(size_t size) { return aligned_allocate<T>(size, 64); }
+
+ KFR_INTRINSIC static void deallocate(T* ptr) { aligned_deallocate(ptr); }
+
+ KFR_INTRINSIC explicit tensor(const shape_type& shape)
+ : m_size(size_of_shape(shape)), m_is_contiguous(true), m_shape(shape),
+ m_strides(internal_generic::strides_for_shape(shape))
+ {
+ T* ptr = allocate(m_size);
+ m_data = ptr;
+ m_finalizer = make_memory_finalizer([ptr]() { deallocate(ptr); });
+ }
+ KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides)
+ : m_size(size_of_shape(shape)),
+ m_is_contiguous(strides == internal_generic::strides_for_shape(shape)), m_shape(shape),
+ m_strides(strides)
+ {
+ T* ptr = allocate(m_size);
+ m_data = ptr;
+ m_finalizer = make_memory_finalizer([ptr]() { deallocate(ptr); });
+ }
+ KFR_INTRINSIC tensor(const shape_type& shape, T value) : tensor(shape)
+ {
+ std::fill(contiguous_begin(), contiguous_end(), value);
+ }
+
+ KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides, T value) : tensor(shape, strides)
+ {
+ std::fill(begin(), end(), value);
+ }
+ KFR_INTRINSIC tensor(const shape_type& shape, std::initializer_list<T> values) : tensor(shape)
+ {
+ if (values.size() != m_size)
+ throw std::runtime_error("Invalid initializer provided for kfr::tensor");
+ std::copy(values.begin(), values.end(), contiguous_begin());
+ }
+
+ KFR_INTRINSIC tensor(const shape_type& shape, const shape_type& strides, std::initializer_list<T> values)
+ : tensor(shape, strides)
+ {
+ if (values.size() != m_size)
+ throw std::runtime_error("Invalid initializer provided for kfr::tensor");
+ std::copy(values.begin(), values.end(), begin());
+ }
+
+ KFR_INTRINSIC pointer data() const { return m_data; }
+
+ KFR_INTRINSIC size_type size() const { return m_size; }
+
+ KFR_INTRINSIC tensor_iterator begin() const { return tensor_iterator{ this, 0 }; }
+ KFR_INTRINSIC tensor_iterator end() const
+ {
+ return tensor_iterator{ this, internal_generic::null_index };
+ }
+
+ KFR_INTRINSIC void require_contiguous() const
+ {
+ if (!m_is_contiguous)
+ throw std::runtime_error("Contiguous array is required");
+ }
+
+ KFR_INTRINSIC contiguous_iterator contiguous_begin() const
+ {
+ require_contiguous();
+ return m_data;
+ }
+ KFR_INTRINSIC contiguous_iterator contiguous_end() const
+ {
+ require_contiguous();
+ return m_data + m_size;
+ }
+
+ KFR_INTRINSIC contiguous_iterator contiguous_begin_unsafe() const { return m_data; }
+ KFR_INTRINSIC contiguous_iterator contiguous_end_unsafe() const { return m_data + m_size; }
+
+ KFR_MEM_INTRINSIC index_t calc_index(const shape_type& indices) const { return indices.dot(m_strides); }
+
+ KFR_MEM_INTRINSIC reference access(const shape_type& indices) const
+ {
+ return m_data[calc_index(indices)];
+ }
+
+ KFR_MEM_INTRINSIC reference operator[](index_t flat_index) const { return m_data[flat_index]; }
+
+ KFR_MEM_INTRINSIC tensor operator()(const shape_type& start, const shape_type& stop) const
+ {
+ return tensor{
+ m_data + calc_index(start),
+ stop - start,
+ m_strides,
+ m_finalizer,
+ };
+ }
+
+ tensor(const tensor&) = default;
+ tensor(tensor&&) = default;
+
+#if defined(CMT_COMPILER_MSVC) && !defined(CMT_COMPILER_CLANG)
+ tensor& operator=(const tensor& src) &
+ {
+ this->~tensor();
+ new (this) tensor(src);
+ }
+ tensor& operator=(tensor&& src) &
+ {
+ this->~tensor();
+ new (this) tensor(std::move(src));
+ }
+#else
+ tensor& operator=(const tensor& src) & = default;
+ tensor& operator=(tensor&& src) & = default;
+#endif
+
+ KFR_MEM_INTRINSIC const tensor& operator=(const tensor& src) const&
+ {
+ assign(src);
+ return *this;
+ }
+ KFR_MEM_INTRINSIC tensor& operator=(const tensor& src) &&
+ {
+ assign(src);
+ return *this;
+ }
+ KFR_MEM_INTRINSIC const tensor& operator=(const T& scalar) const&
+ {
+ assign(scalar);
+ return *this;
+ }
+ KFR_MEM_INTRINSIC tensor& operator=(const T& scalar) &&
+ {
+ assign(scalar);
+ return *this;
+ }
+
+ KFR_MEM_INTRINSIC void assign(const tensor& src) const
+ {
+ if (src.shape() != m_shape)
+ throw std::range_error("Tensors must have smae shape");
+ std::copy(src.begin(), src.end(), begin());
+ }
+ KFR_MEM_INTRINSIC void assign(const T& scalar) const { std::fill(begin(), end(), scalar); }
+
+ template <typename... Index>
+ static constexpr bool has_tensor_range = (std::is_same_v<Index, tensor_range> || ...);
+
+ template <typename... Index,
+ size_t ndimOut = internal_generic::count_dimensions<Index...>() + (dims - sizeof...(Index)),
+ std::enable_if_t<has_tensor_range<Index...> || (sizeof...(Index) < dims)>* = nullptr>
+ KFR_MEM_INTRINSIC tensor<T, ndimOut> operator()(const Index&... idx) const
+ {
+ shape_type start{ internal_generic::get_start(idx)... };
+ shape_type stop{ internal_generic::get_stop(idx)... };
+ stop = min(*stop, *m_shape);
+ shape_type strides = m_strides;
+ for (index_t i = sizeof...(Index); i < dims; ++i)
+ {
+ start[i] = 0;
+ stop[i] = m_shape[i];
+ }
+ T* data = m_data + calc_index(start);
+ shape_type shape = *stop - *start;
+
+ return tensor<T, ndimOut>{
+ data,
+ internal_generic::compact_shape<dims, ndimOut, std::is_same_v<Index, tensor_range>...>(shape),
+ internal_generic::compact_shape<dims, ndimOut, std::is_same_v<Index, tensor_range>...>(strides),
+ m_finalizer,
+ };
+ }
+
+ using tensor_subscript<T, tensor<T, NDims>, std::make_index_sequence<NDims>>::operator();
+
+ template <index_t dims>
+ KFR_MEM_INTRINSIC tensor<T, dims> reshape_may_copy(const shape<dims>& new_shape,
+ bool allow_copy = false) const
+ {
+ if (size_of_shape(new_shape) != m_size)
+ {
+ throw std::runtime_error("Invalid shape provided");
+ }
+ /*
+ TODO: reshape must be possible with non-contiguous arrays:
+ [256, 256, 1] -> [256, 256]
+ [256, 256] -> [256, 256, 1]
+ [256, 256] -> [256, 1, 256]
+ */
+ if (!is_contiguous())
+ {
+ if (allow_copy)
+ {
+ tensor<T, dims> result(new_shape);
+ std::copy(begin(), end(), result.contiguous_begin());
+ return result;
+ }
+ else
+ {
+ throw std::runtime_error("reshape requires contiguous array");
+ }
+ }
+ return tensor<T, dims>{
+ m_data,
+ new_shape,
+ internal_generic::strides_for_shape(new_shape),
+ m_finalizer,
+ };
+ }
+
+ template <index_t dims>
+ KFR_MEM_INTRINSIC tensor<T, dims> reshape(const shape<dims>& new_shape) const
+ {
+ return reshape_may_copy(new_shape, false);
+ }
+
+ KFR_MEM_INTRINSIC tensor<T, 1> flatten() const { return reshape(shape<1>{ m_size }, false); }
+
+ KFR_MEM_INTRINSIC tensor<T, 1> flatten_may_copy(bool allow_copy = false) const
+ {
+ return reshape(shape<1>{ m_size }, allow_copy);
+ }
+
+ KFR_MEM_INTRINSIC tensor copy() const
+ {
+ tensor result(m_shape);
+ std::copy(begin(), end(), result.contiguous_begin());
+ return result;
+ }
+
+ KFR_MEM_INTRINSIC void copy_from(const tensor& other) { std::copy(other.begin(), other.end(), begin()); }
+
+ template <typename Fn>
+ KFR_MEM_INTRINSIC void iterate(Fn&& fn) const
+ {
+ auto it = begin();
+ while (it != end())
+ {
+ fn(*it, it.indices);
+ ++it;
+ }
+ }
+
+ template <typename Fn, typename Tout = std::invoke_result_t<Fn, T>>
+ KFR_MEM_INTRINSIC tensor<Tout, dims> map(Fn&& fn) const
+ {
+ return unary(std::forward<Fn>(fn));
+ }
+
+ template <typename Fn, typename Tout = std::invoke_result_t<Fn, T>>
+ KFR_MEM_INTRINSIC tensor<Tout, dims> unary(Fn&& fn) const
+ {
+ tensor<Tout, dims> result(m_shape);
+ auto dst = result.contiguous_begin_unsafe();
+ if (is_contiguous())
+ {
+ auto src = contiguous_begin_unsafe();
+ while (src != contiguous_end_unsafe())
+ {
+ *dst = fn(*src);
+ ++src;
+ ++dst;
+ }
+ }
+ else
+ {
+ auto src = begin();
+ while (src != end())
+ {
+ *dst = fn(*src);
+ ++src;
+ ++dst;
+ }
+ }
+ return result;
+ }
+
+ template <typename Fn>
+ KFR_MEM_INTRINSIC const tensor& unary_inplace(Fn&& fn) const
+ {
+ if (is_contiguous())
+ {
+ auto it = contiguous_begin_unsafe();
+ while (it != contiguous_end_unsafe())
+ {
+ *it = fn(*it);
+ ++it;
+ }
+ }
+ else
+ {
+ auto it = begin();
+ while (it != end())
+ {
+ *it = fn(*it);
+ ++it;
+ }
+ }
+ return *this;
+ }
+
+ template <typename Fn>
+ KFR_MEM_INTRINSIC T reduce(Fn&& fn, T initial = T{}) const
+ {
+ T result = initial;
+ if (is_contiguous())
+ {
+ auto src = contiguous_begin_unsafe();
+ while (src != contiguous_end_unsafe())
+ {
+ result = fn(*src, result);
+ ++src;
+ }
+ }
+ else
+ {
+ auto src = begin();
+ while (src != end())
+ {
+ result = fn(*src, result);
+ ++src;
+ }
+ }
+ return result;
+ }
+
+ template <typename Fn, typename U, typename Tout = std::invoke_result_t<Fn, T, U>>
+ KFR_MEM_INTRINSIC tensor<Tout, dims> binary(const tensor<U, dims>& rhs, Fn&& fn) const
+ {
+ tensor<Tout, dims> result(m_shape);
+ if (is_contiguous() && rhs.is_contiguous())
+ {
+ auto src1 = contiguous_begin_unsafe();
+ auto src2 = rhs.contiguous_begin_unsafe();
+ auto dst = result.contiguous_begin_unsafe();
+ while (src1 != contiguous_end_unsafe())
+ {
+ *dst = fn(*src1, *src2);
+ ++src1;
+ ++src2;
+ ++dst;
+ }
+ }
+ else
+ {
+ auto src1 = begin();
+ auto src2 = rhs.begin();
+ auto dst = result.contiguous_begin();
+ while (src1 != end())
+ {
+ *dst = fn(*src1, *src2);
+ ++src1;
+ ++src2;
+ ++dst;
+ }
+ }
+ return result;
+ }
+
+ template <typename Fn, typename U>
+ KFR_MEM_INTRINSIC const tensor& binary_inplace(const tensor<U, dims>& rhs, Fn&& fn) const
+ {
+ if (is_contiguous() && rhs.is_contiguous())
+ {
+ auto it = contiguous_begin_unsafe();
+ auto src2 = rhs.contiguous_begin_unsafe();
+ while (it != contiguous_end_unsafe())
+ {
+ *it = fn(*it, *src2);
+ ++it;
+ ++src2;
+ }
+ }
+ else
+ {
+ auto it = begin();
+ auto src2 = rhs.begin();
+ while (it != end())
+ {
+ *it = fn(*it, *src2);
+ ++it;
+ ++src2;
+ }
+ }
+ return *this;
+ }
+
+ template <typename U>
+ KFR_MEM_INTRINSIC tensor<U, dims> astype() const
+ {
+ return unary([](T value) { return static_cast<U>(value); });
+ }
+
+ template <size_t Nout>
+ KFR_MEM_INTRINSIC std::array<T, Nout> to_array() const
+ {
+ if (m_size != Nout)
+ throw std::range_error("Nout != m_size");
+ std::array<T, Nout> result;
+ if (is_contiguous())
+ std::copy(contiguous_begin(), contiguous_end(), result.begin());
+ else
+ std::copy(begin(), end(), result.begin());
+ return result;
+ }
+ struct nested_iterator_t
+ {
+ using iterator_category = std::forward_iterator_tag;
+ using difference_type = std::ptrdiff_t;
+ using value_type = tensor<T, dims - 1>;
+ using pointer = const value_type*;
+ using reference = const value_type&;
+
+ const tensor* src;
+ size_t index;
+
+ KFR_MEM_INTRINSIC value_type operator*() { return src->operator()(index); }
+ KFR_MEM_INTRINSIC pointer operator->() { return &operator*(); }
+
+ // prefix
+ KFR_MEM_INTRINSIC nested_iterator_t& operator++()
+ {
+ ++index;
+ return *this;
+ }
+
+ // postfix
+ KFR_MEM_INTRINSIC nested_iterator_t operator++(int)
+ {
+ nested_iterator_t temp = *this;
+ ++*this;
+ return temp;
+ }
+
+ KFR_MEM_INTRINSIC bool operator==(const nested_iterator_t& it) const
+ {
+ return src == it.src && index == it.index;
+ }
+ KFR_MEM_INTRINSIC bool operator!=(const nested_iterator_t& it) const { return !operator==(it); }
+ };
+
+ using nested_iterator = std::conditional_t<dims == 1, tensor_iterator, nested_iterator_t>;
+
+ KFR_MEM_INTRINSIC nested_iterator nested_begin() const
+ {
+ if constexpr (dims == 1)
+ return begin();
+ else
+ return { this, 0 };
+ }
+ KFR_MEM_INTRINSIC nested_iterator nested_end() const
+ {
+ if constexpr (dims == 1)
+ return end();
+ else
+ return { this, m_shape[0] };
+ }
+
+ KFR_MEM_INTRINSIC memory_finalizer finalizer() const { return m_finalizer; }
+
+ template <typename Input, index_t Dims = expression_traits<Input>::dims>
+ KFR_MEM_INTRINSIC const tensor& operator=(Input&& input) const&
+ {
+ tprocess(*this, input);
+ return *this;
+ }
+ template <typename Input, index_t Dims = expression_traits<Input>::dims>
+ KFR_MEM_INTRINSIC tensor& operator=(Input&& input) &&
+ {
+ tprocess(*this, input);
+ return *this;
+ }
+
+ // template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>)>
+ // tensor(Input&& input) : tensor(make_vector<index_t>(input.size()))
+ // {
+ // this->assign_expr(std::forward<Input>(input));
+ // }
+
+ template <size_t N, bool only_last = false>
+ KFR_MEM_INTRINSIC shape<N> offsets(shape<dims> indices) const
+ {
+ kfr::shape<N> result;
+ if constexpr (only_last)
+ {
+ index_t base = calc_index(indices);
+ result = base + enumerate(vec_shape<index_t, N>{}, m_shape.back());
+ }
+ else
+ {
+ for (index_t i = 0; i < N; ++i)
+ {
+ result[i] = calc_index(indices);
+ internal_generic::increment_indices(indices, kfr::shape<dims>(0), m_shape);
+ }
+ }
+ return result;
+ }
+
+ template <size_t N, bool only_last = false>
+ KFR_MEM_INTRINSIC shape<N> offsets(size_t flat_index) const
+ {
+ kfr::shape<dims> indices;
+ for (index_t i = 0; i < dims; ++i)
+ {
+ indices[dims - 1 - i] = flat_index % m_shape[dims - 1 - i];
+ flat_index /= m_shape[dims - 1 - i];
+ }
+ return offsets<N, only_last>(indices);
+ }
+
+ bool operator==(const tensor& other) const
+ {
+ return shape() == other.shape() && std::equal(begin(), end(), other.begin());
+ }
+ bool operator!=(const tensor& other) const { return !operator==(other); }
+
+ KFR_MEM_INTRINSIC const shape_type& shape() const { return m_shape; }
+ KFR_MEM_INTRINSIC const shape_type& strides() const { return m_strides; }
+
+ KFR_MEM_INTRINSIC bool is_contiguous() const { return m_is_contiguous; }
+
+ KFR_MEM_INTRINSIC bool is_last_contiguous() const { return m_strides.back() == 1; }
+
+private:
+ template <typename Input>
+ KFR_MEM_INTRINSIC void assign_expr(Input&& input) const
+ {
+ process(*this, std::forward<Input>(input));
+ }
+
+ T* m_data;
+ index_t m_size;
+ bool m_is_contiguous;
+ shape_type m_shape;
+ shape_type m_strides;
+ memory_finalizer m_finalizer;
+};
+
+template <typename T>
+struct tensor<T, 0>
+{
+};
+
+template <typename Container, CMT_ENABLE_IF(kfr::has_data_size<Container>),
+ typename T = typename Container::value_type>
+KFR_INTRINSIC tensor<T, 1> tensor_from_container(Container vector)
+{
+ struct vector_finalizer
+ {
+ mutable std::optional<Container> vector;
+ void operator()(void*) const { vector.reset(); }
+ };
+
+ vector_finalizer finalizer{ std::move(vector) };
+
+ memory_finalizer mem = std::shared_ptr<void>(nullptr, std::move(finalizer));
+ vector_finalizer* fin = std::get_deleter<vector_finalizer>(mem);
+
+ return tensor<T, 1>(fin->vector->data(), make_vector<index_t>(fin->vector->size()), std::move(mem));
+}
+
+template <typename T, index_t Dims>
+struct expression_traits<tensor<T, Dims>> : expression_traits_defaults
+{
+ using value_type = T;
+ constexpr static size_t dims = Dims;
+
+ KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const tensor<T, Dims>& self)
+ {
+ return self.shape();
+ }
+ KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return { 0 }; }
+};
+
+inline namespace CMT_ARCH_NAME
+{
+
+template <typename T, index_t NDims, typename U, size_t N>
+KFR_INTRINSIC vec<U, N> get_elements(const tensor<T, NDims>& self, const shape<NDims>& index, vec_shape<U, N>)
+{
+ const T* data = self.data() + self.calc_index(index);
+ if (self.is_last_contiguous())
+ {
+ return static_cast<vec<U, N>>(read<N>(data));
+ }
+ else
+ {
+ return static_cast<vec<U, N>>(gather_stride<N>(data, self.strides().back()));
+ }
+}
+
+template <typename T, index_t NDims, typename U, size_t N>
+KFR_INTRINSIC void set_elements(const tensor<T, NDims>& self, const shape<NDims>& index,
+ const vec<U, N>& value)
+{
+ T* data = self.data() + self.calc_index(index);
+ if (self.is_last_contiguous())
+ {
+ write(data, vec<T, N>(value));
+ }
+ else
+ {
+ scatter_stride(data, vec<T, N>(value), self.strides().back());
+ }
+}
+
+template <typename T, index_t dims1, index_t dims2, typename Fn, index_t outdims = const_max(dims1, dims2)>
+tensor<T, outdims> tapply(const tensor<T, dims1>& x, const tensor<T, dims2>& y, Fn&& fn)
+{
+ shape<outdims> xyshape = internal_generic::common_shape(x.shape(), y.shape());
+
+ tensor<T, outdims> result(xyshape);
+
+ shape<outdims> xshape = padlow<outdims - dims1>(*x.shape(), 1);
+ shape<outdims> yshape = padlow<outdims - dims2>(*y.shape(), 1);
+
+ tensor<T, outdims> xx = x.reshape(xshape);
+ tensor<T, outdims> yy = y.reshape(yshape);
+
+ result.iterate([&](T& val, const shape<outdims>& index)
+ { val = fn(xx.access(xshape.adapt(index)), yy.access(yshape.adapt(index))); });
+
+ return result;
+}
+
+} // namespace CMT_ARCH_NAME
+
+} // namespace kfr
+
+namespace cometa
+{
+template <size_t dims>
+struct representation<kfr::shape<dims>>
+{
+ using type = std::string;
+ static std::string get(const kfr::shape<dims>& value)
+ {
+ if constexpr (dims == 0)
+ {
+ return "()";
+ }
+ else
+ {
+ std::string s;
+ for (size_t i = 0; i < dims; ++i)
+ {
+ if (CMT_LIKELY(i > 0))
+ s += ", ";
+ s += as_string(value[i]);
+ }
+ return s;
+ }
+ }
+};
+} // namespace cometa
+
+CMT_PRAGMA_MSVC(warning(pop))
+\ No newline at end of file