28 template <
typename Range>
29 std::ostream&
LogRange(std::ostream& os, Range&& range, std::string delim)
43 template <
typename T,
typename Range>
44 std::ostream&
LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
55 if constexpr(std::is_same_v<RangeType, ck::f8_t> || std::is_same_v<RangeType, ck::bf8_t> ||
56 std::is_same_v<RangeType, ck::bhalf_t>)
58 os << ck::type_convert<float>(v);
60 else if constexpr(std::is_same_v<RangeType, ck::pk_i4_t> ||
61 std::is_same_v<RangeType, ck::f4x2_pk_t>)
63 const auto packed_floats = ck::type_convert<ck::float2_t>(v);
65 os << vector_of_floats.template AsType<float>()[
ck::Number<0>{}] << delim
66 << vector_of_floats.template AsType<float>()[
ck::Number<1>{}];
70 os << static_cast<T>(v);
76 template <
typename F,
typename T, std::size_t... Is>
79 return f(std::get<Is>(args)...);
82 template <
typename F,
typename T>
85 constexpr std::size_t N = std::tuple_size<T>{};
90 template <
typename F,
typename T, std::size_t... Is>
93 return F(std::get<Is>(args)...);
96 template <
typename F,
typename T>
99 constexpr std::size_t N = std::tuple_size<T>{};
101 return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
187 template <
typename Layout>
189 std::vector<std::size_t> strides,
191 : mLens(std::move(lens)), mStrides(std::move(strides))
197 std::cout <<
"Original Lens: [";
198 LogRange(std::cout, mLens,
", ") <<
"] and Strides: [";
199 LogRange(std::cout, mStrides,
", ") <<
"]" << std::endl;
200 std::cout <<
"Layout: " <<
layout <<
" --> " << new_layout << std::endl;
215 template <
typename F,
typename OrigLayout>
223 default: f(orig);
break;
227 template <
typename Layout>
230 if constexpr(!std::is_same_v<Layout, DefaultLayout>)
242 const auto rank = mLens.size();
263 if(mStrides.size() == 2)
283 template <
typename Layout>
286 if constexpr(std::is_same_v<Layout, ck::tensor_layout::BypassLayoutVerification>)
290 auto strides_int = AsInt(mStrides);
293 if(mStrides.empty() || std::all_of(strides_int.begin(), strides_int.end(), [](
int stride) {
298 if constexpr(!(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
299 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>))
301 std::cerr <<
"Only RowMajor and ColumnMajor layouts are supported for empty "
303 <<
layout <<
". Will calculate strides as RowMajor." << std::endl;
307 mStrides.resize(mLens.size(), 0);
312 std::partial_sum(mLens.rbegin(),
314 mStrides.rbegin() + 1,
315 std::multiplies<std::size_t>());
317 if constexpr(std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
320 if(mStrides.size() >= 2)
321 std::swap(mStrides[mStrides.size() - 1], mStrides[mStrides.size() - 2]);
328 else if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
329 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
331 auto rank = mStrides.size();
332 if(mLens.size() >= 2 &&
rank >= 2)
334 const auto inner_idx =
335 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
336 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
337 if(mStrides[inner_idx] <= 0)
339 mStrides[inner_idx] = 1;
341 if(mStrides[outer_idx] <= 0)
343 mStrides[outer_idx] = mLens[inner_idx] * mStrides[inner_idx];
349 template <
typename Layout>
352 if constexpr(std::is_same_v<ck::tensor_layout::BypassLayoutVerification, Layout>)
359 throw std::runtime_error(
360 "HostTensorDescriptor::ValidateStrides: empty tensor dimensions is not allowed.");
363 const int rank = mLens.size();
369 if constexpr(std::is_same_v<ck::tensor_layout::BaseTensorLayout, Layout>)
373 throw std::runtime_error(
"HostTensorDescriptor::ValidateStrides: Abstract tensor "
374 "layout BaseTensorLayout can't be verified. Pls "
375 "pass specific tensor layout to HostTensorDescriptor (or "
376 "ck::tensor_layout::BypassLayoutVerification)");
380 if constexpr(std::is_base_of_v<ck::tensor_layout::gemm::BaseGemmLayout, Layout>)
382 if(mLens.size() != mStrides.size())
384 std::ostringstream oss;
385 oss <<
"HostTensorDescriptor::ValidateStrides: mismatch between tensor rank and "
388 throw std::runtime_error(oss.str());
393 auto strides_int = AsInt(mStrides);
395 strides_int.begin(), strides_int.end(), [](
int stride) { return stride <= 0; }))
397 std::ostringstream oss;
398 oss <<
"Stride values must be positive or all-zeros (auto-derived from tensor "
399 "dimensions). Instead got ";
401 strides_int.begin(), strides_int.end(), std::ostream_iterator<int>(oss,
" "));
402 throw std::runtime_error(oss.str());
405 if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
406 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
410 const auto inner_idx =
411 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
412 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
414 if(mStrides[outer_idx] < mLens[inner_idx] * mStrides[inner_idx])
416 std::ostringstream oss;
417 oss <<
"Invalid strides for " <<
layout <<
": " << *
this;
418 throw std::runtime_error(oss.str());
422 for(
int i = 1; i <
rank - 2; ++i)
424 if(mStrides[i - 1] < mStrides[i] * mLens[i])
426 std::ostringstream oss;
427 oss <<
"Invalid strides for higher dimensions in " <<
layout <<
": "
429 throw std::runtime_error(oss.str());
435 std::ostringstream oss;
436 oss <<
"Error: Unsupported GEMM layout: " <<
layout;
437 throw std::runtime_error(oss.str());
446 std::cerr <<
"Warning: Tensor layout verification for ck::tensor_layout::convolution "
447 "layouts is not supported yet. Skipping..."
453 std::ostringstream oss;
454 oss <<
"Error: Tensor layout verification for " <<
layout <<
" is not supported yet.";
455 throw std::runtime_error(oss.str());
459 template <
typename X,
461 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
462 std::is_convertible_v<Layout, BaseTensorLayout>>>
467 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
471 typename = std::enable_if_t<std::is_convertible_v<Layout, BaseTensorLayout>>>
477 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
480 template <
typename Lengths,
483 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> ||
484 std::is_convertible_v<ck::ranges::range_value_t<Lengths>,
ck::long_index_t>) &&
485 std::is_convertible_v<Layout, BaseTensorLayout>>>
490 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
493 template <
typename X,
495 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
496 std::is_convertible_v<Y, std::size_t>>,
499 const std::initializer_list<Y>& strides,
502 std::vector<std::size_t>(strides.begin(), strides.end()),
506 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
510 template <
typename Layout = DefaultLayout>
512 const std::initializer_list<ck::long_index_t>& strides,
515 std::vector<std::size_t>(strides.begin(), strides.end()),
519 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
523 template <
typename Str
ides,
typename Layout = DefaultLayout>
525 const Strides& strides,
528 std::vector<std::size_t>(strides.begin(), strides.end()),
532 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
535 template <
typename Lengths,
539 ((std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> &&
540 std::is_convertible_v<ck::ranges::range_value_t<Strides>, std::size_t>) ||
543 std::is_convertible_v<Layout, BaseTensorLayout>>>
545 const Strides& strides,
548 std::vector<std::size_t>(strides.begin(), strides.end()),
552 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
562 template <
typename... Is>
566 std::initializer_list<std::size_t> iss{
static_cast<std::size_t
>(is)...};
579 std::vector<std::size_t> mLens;
580 std::vector<std::size_t> mStrides;
581 static constexpr
bool dbg =
false;
589 std::vector<int> AsInt(
const std::vector<size_t>& vec)
const
591 std::vector<int> strides_int(vec.size());
592 std::transform(vec.begin(), vec.end(), strides_int.begin(), [](std::size_t stride) {
593 return static_cast<int>(stride);
599 template <
typename New2Old,
typename NewLayout = HostTensorDescriptor::BaseTensorLayout>
602 const New2Old& new2old,
603 const NewLayout& new_layout = NewLayout())
605 std::vector<std::size_t> new_lengths(
a.GetNumOfDimension());
606 std::vector<std::size_t> new_strides(
a.GetNumOfDimension());
608 for(std::size_t i = 0; i <
a.GetNumOfDimension(); i++)
610 new_lengths[i] =
a.GetLengths()[new2old[i]];
611 new_strides[i] =
a.GetStrides()[new2old[i]];
619 template <
typename... Xs>
634 template <
typename F,
typename... Xs>
638 static constexpr std::size_t
NDIM =
sizeof...(Xs);
639 std::array<std::size_t, NDIM>
mLens;
646 std::partial_sum(
mLens.rbegin(),
649 std::multiplies<std::size_t>());
655 std::array<std::size_t, NDIM> indices;
657 for(std::size_t idim = 0; idim <
NDIM; ++idim)
660 i -= indices[idim] *
mStrides[idim];
668 std::size_t work_per_thread = (
mN1d + num_thread - 1) / num_thread;
670 std::vector<joinable_thread> threads(num_thread);
672 for(std::size_t it = 0; it < num_thread; ++it)
674 std::size_t iw_begin = it * work_per_thread;
675 std::size_t iw_end =
std::min((it + 1) * work_per_thread,
mN1d);
677 auto f = [=, *
this] {
678 for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
688 template <
typename F,
typename... Xs>
694 template <
typename T>
700 template <
typename X>
705 template <
typename X,
typename Y>
706 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
711 template <
typename Lengths>
716 template <
typename Lengths,
typename Str
ides>
717 Tensor(
const Lengths& lens,
const Strides& strides)
722 template <
typename X,
typename... Rest,
std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
723 Tensor(std::initializer_list<X> lens, Rest&&... rest)
728 template <
typename X,
732 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides, Rest&&... rest)
737 template <
typename Lengths,
typename... Rest,
std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
738 Tensor(
const Lengths& lens, Rest&&... rest)
743 template <
typename Lengths,
747 Tensor(
const Lengths& lens,
const Strides& strides, Rest&&... rest)
754 template <
typename OutT>
760 mData, ret.
mData.begin(), [](
auto value) { return ck::type_convert<OutT>(value); });
774 template <
typename FromT>
778 void savetxt(std::string file_name, std::string dtype =
"float")
780 std::ofstream file(file_name);
784 for(
auto& itm :
mData)
787 file << ck::type_convert<float>(itm) << std::endl;
788 else if(dtype ==
"int")
789 file << ck::type_convert<int>(itm) << std::endl;
793 file << ck::type_convert<float>(itm) << std::endl;
801 throw std::runtime_error(std::string(
"unable to open file:") + file_name);
828 template <
typename F>
844 template <
typename F>
851 template <
typename F>
867 template <
typename F>
874 template <
typename G>
880 auto f = [&](
auto i) { (*this)(i) = g(i); };
885 auto f = [&](
auto i0,
auto i1) { (*this)(i0, i1) = g(i0, i1); };
890 auto f = [&](
auto i0,
auto i1,
auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
896 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3) {
897 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
907 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4) {
908 (*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4);
919 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4,
auto i5) {
920 (*this)(i0, i1, i2, i3, i4, i5) = g(i0, i1, i2, i3, i4, i5);
932 auto f = [&](
auto i0,
944 (*this)(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11) =
945 g(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11);
962 default:
throw std::runtime_error(
"unspported dimension");
968 template <
typename Distribution = std::uniform_real_distribution<
float>,
969 typename Mapping = ck::
identity,
970 typename Generator = std::minstd_rand>
973 const Generator g = Generator(0),
974 std::size_t num_thread = -1)
978 if(num_thread == -1ULL)
982 constexpr std::size_t BLOCK_BYTES = 64;
983 constexpr std::size_t BLOCK_SIZE = BLOCK_BYTES /
sizeof(T);
988 std::vector<std::thread> threads;
989 threads.reserve(num_thread - 1);
990 const auto dst =
const_cast<T*
>(this->
mData.data());
992 for(
int it = num_thread - 1; it >= 0; --it)
994 std::size_t ib_begin = it * blocks_per_thread;
995 std::size_t ib_end =
min(ib_begin + blocks_per_thread, num_blocks);
1000 g_.discard(ib_begin * BLOCK_SIZE * ck::packed_size_v<T>);
1007 if constexpr(ck::is_same_v<T, ck::f8_t> || ck::is_same_v<T, ck::bf8_t>)
1008 return ck::type_convert<T>(
static_cast<float>(fn(dis_(g_))));
1009 else if constexpr(ck::packed_size_v<T> == 1)
1012 return
ck::f4x2_pk_t{ck::type_convert<ck::f4x2_t>(
1014 ck::type_convert<float>(fn(dis_(g_)))})};
1015 else if constexpr(ck::is_same_v<T, ck::f6x32_pk_t> ||
1016 ck::is_same_v<T, ck::bf6x32_pk_t>)
1018 return ck::type_convert<T>(
1020 ck::type_convert<float>(fn(dis_(g_))),
1021 ck::type_convert<float>(fn(dis_(g_))),
1022 ck::type_convert<float>(fn(dis_(g_))),
1023 ck::type_convert<float>(fn(dis_(g_))),
1024 ck::type_convert<float>(fn(dis_(g_))),
1025 ck::type_convert<float>(fn(dis_(g_))),
1026 ck::type_convert<float>(fn(dis_(g_))),
1027 ck::type_convert<float>(fn(dis_(g_))),
1028 ck::type_convert<float>(fn(dis_(g_))),
1029 ck::type_convert<float>(fn(dis_(g_))),
1030 ck::type_convert<float>(fn(dis_(g_))),
1031 ck::type_convert<float>(fn(dis_(g_))),
1032 ck::type_convert<float>(fn(dis_(g_))),
1033 ck::type_convert<float>(fn(dis_(g_))),
1034 ck::type_convert<float>(fn(dis_(g_))),
1035 ck::type_convert<float>(fn(dis_(g_))),
1036 ck::type_convert<float>(fn(dis_(g_))),
1037 ck::type_convert<float>(fn(dis_(g_))),
1038 ck::type_convert<float>(fn(dis_(g_))),
1039 ck::type_convert<float>(fn(dis_(g_))),
1040 ck::type_convert<float>(fn(dis_(g_))),
1041 ck::type_convert<float>(fn(dis_(g_))),
1042 ck::type_convert<float>(fn(dis_(g_))),
1043 ck::type_convert<float>(fn(dis_(g_))),
1044 ck::type_convert<float>(fn(dis_(g_))),
1045 ck::type_convert<float>(fn(dis_(g_))),
1046 ck::type_convert<float>(fn(dis_(g_))),
1047 ck::type_convert<float>(fn(dis_(g_))),
1048 ck::type_convert<float>(fn(dis_(g_))),
1049 ck::type_convert<float>(fn(dis_(g_))),
1050 ck::type_convert<float>(fn(dis_(g_)))});
1052 else if constexpr(ck::is_same_v<T, ck::f6x16_pk_t> ||
1053 ck::is_same_v<T, ck::bf6x16_pk_t>)
1055 return ck::type_convert<T>(
1057 ck::type_convert<float>(fn(dis_(g_))),
1058 ck::type_convert<float>(fn(dis_(g_))),
1059 ck::type_convert<float>(fn(dis_(g_))),
1060 ck::type_convert<float>(fn(dis_(g_))),
1061 ck::type_convert<float>(fn(dis_(g_))),
1062 ck::type_convert<float>(fn(dis_(g_))),
1063 ck::type_convert<float>(fn(dis_(g_))),
1064 ck::type_convert<float>(fn(dis_(g_))),
1065 ck::type_convert<float>(fn(dis_(g_))),
1066 ck::type_convert<float>(fn(dis_(g_))),
1067 ck::type_convert<float>(fn(dis_(g_))),
1068 ck::type_convert<float>(fn(dis_(g_))),
1069 ck::type_convert<float>(fn(dis_(g_))),
1070 ck::type_convert<float>(fn(dis_(g_))),
1071 ck::type_convert<float>(fn(dis_(g_)))});
1074 static_assert(
false,
"Unsupported packed size for T");
1077 std::size_t ib = ib_begin;
1078 for(; ib < ib_end - 1; ++ib)
1080 constexpr
size_t iw = iw_.value;
1081 dst[ib * BLOCK_SIZE + iw] = t_fn();
1083 for(std::size_t iw = 0; iw < BLOCK_SIZE; ++iw)
1084 if(ib * BLOCK_SIZE + iw < element_space_size)
1085 dst[ib * BLOCK_SIZE + iw] = t_fn();
1089 threads.emplace_back(std::move(job));
1093 for(
auto& t : threads)
1097 template <
typename... Is>
1103 template <
typename... Is>
1107 ck::packed_size_v<ck::remove_cvref_t<T>>];
1110 template <
typename... Is>
1114 ck::packed_size_v<ck::remove_cvref_t<T>>];
1133 typename Data::const_iterator
begin()
const {
return mData.begin(); }
1135 typename Data::const_iterator
end()
const {
return mData.end(); }
1137 typename Data::const_pointer
data()
const {
return mData.data(); }
1139 typename Data::size_type
size()
const {
return mData.size(); }
1141 template <
typename U = T>
1144 constexpr std::size_t FromSize =
sizeof(T);
1145 constexpr std::size_t ToSize =
sizeof(U);
1147 using Element = std::add_const_t<std::remove_reference_t<U>>;
1151 template <
typename U = T>
1154 constexpr std::size_t FromSize =
sizeof(T);
1155 constexpr std::size_t ToSize =
sizeof(U);
1157 using Element = std::remove_reference_t<U>;
__host__ constexpr __device__ auto rank([[maybe_unused]] const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
Definition: layout_utils.hpp:310
__host__ constexpr __device__ auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:72
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
auto copy(InputRange &&range, OutputIterator iter) -> decltype(std::copy(std::begin(std::forward< InputRange >(range)), std::end(std::forward< InputRange >(range)), iter))
Definition: algorithm.hpp:14
iter_value_t< ranges::iterator_t< R > > range_value_t
Definition: ranges.hpp:28
typename vector_type< float, 16 >::type float16_t
Definition: dtype_vector.hpp:2149
unsigned int get_available_cpu_cores()
Definition: thread.hpp:11
HostTensorDescriptor transpose_host_tensor_descriptor_given_new2old(const HostTensorDescriptor &a, const New2Old &new2old, const NewLayout &new_layout=NewLayout())
Definition: host_tensor.hpp:601
std::ostream & LogRange(std::ostream &os, Range &&range, std::string delim)
Definition: host_tensor.hpp:29
auto call_f_unpack_args_impl(F f, T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:77
int64_t long_index_t
Definition: ck.hpp:302
typename vector_type< float, 2 >::type float2_t
Definition: dtype_vector.hpp:2146
auto construct_f_unpack_args_impl(T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:91
__host__ constexpr __device__ Y type_convert(X x)
Definition: type_convert.hpp:98
constexpr bool is_base_of_v
Definition: type.hpp:286
auto construct_f_unpack_args(F, T args)
Definition: host_tensor.hpp:97
constexpr bool is_same_v
Definition: type.hpp:283
auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:689
constexpr bool is_packed_type_v
Definition: data_type.hpp:414
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition: type.hpp:297
typename std::enable_if< B, T >::type enable_if_t
Definition: enable_if.hpp:27
auto call_f_unpack_args(F f, T args)
Definition: host_tensor.hpp:83
__device__ void inner_product(const TA &a, const TB &b, TC &c)
typename vector_type< float, 32 >::type float32_t
Definition: dtype_vector.hpp:2150
std::ostream & LogRangeAsType(std::ostream &os, Range &&range, std::string delim)
Definition: host_tensor.hpp:44
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
const GenericPointer< typename T::ValueType > & pointer
Definition: pointer.h:1514
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1517
Layout wrapper that performs the tensor descriptor logic.
Definition: layout.hpp:24
A descriptor class for host tensors that manages tensor dimensions, strides, and layout.
Definition: host_tensor.hpp:173
const std::vector< std::size_t > & GetStrides() const
HostTensorDescriptor()
Definition: host_tensor.hpp:210
ChosenLayout HandleDefaultLayout(const Layout &)
Definition: host_tensor.hpp:228
void DispatchChosenLayout(ChosenLayout tag, const OrigLayout &orig, F &&f) const
Definition: host_tensor.hpp:216
HostTensorDescriptor(const std::initializer_list< X > &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:463
std::size_t GetElementSize() const
HostTensorDescriptor(const Lengths &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:486
HostTensorDescriptor(const Lengths &lens, const Strides &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:544
std::size_t GetElementSpaceSize() const
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:472
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:563
void CalculateStrides(const Layout &layout)
Definition: host_tensor.hpp:284
ck::tensor_layout::BaseTensorLayout BaseTensorLayout
Definition: host_tensor.hpp:174
void ValidateStrides(const Layout &layout) const
Definition: host_tensor.hpp:350
HostTensorDescriptor(const std::initializer_list< X > &lens, const std::initializer_list< Y > &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:498
std::size_t GetOffsetFromMultiIndex(const std::vector< std::size_t > &iss) const
Definition: host_tensor.hpp:570
HostTensorDescriptor(const std::initializer_list< std::size_t > &lens, const Strides &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:524
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const std::initializer_list< ck::long_index_t > &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:511
friend std::ostream & operator<<(std::ostream &os, ChosenLayout tag)
std::size_t GetNumOfDimension() const
HostTensorDescriptor(std::vector< std::size_t > lens, std::vector< std::size_t > strides, const Layout &layout=DefaultLayout())
Definition: host_tensor.hpp:188
const std::vector< std::size_t > & GetLengths() const
ChosenLayout
Definition: host_tensor.hpp:180
BaseTensorLayout DefaultLayout
Definition: host_tensor.hpp:175
friend std::ostream & operator<<(std::ostream &os, const HostTensorDescriptor &desc)
Definition: host_tensor.hpp:636
std::array< std::size_t, NDIM > GetNdIndices(std::size_t i) const
Definition: host_tensor.hpp:653
std::size_t mN1d
Definition: host_tensor.hpp:641
std::array< std::size_t, NDIM > mStrides
Definition: host_tensor.hpp:640
static constexpr std::size_t NDIM
Definition: host_tensor.hpp:638
F mF
Definition: host_tensor.hpp:637
std::array< std::size_t, NDIM > mLens
Definition: host_tensor.hpp:639
ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:643
void operator()(std::size_t num_thread=1) const
Definition: host_tensor.hpp:666
Definition: host_tensor.hpp:696
void GenerateTensorDistr(Distribution dis={0.f, 1.f}, Mapping fn={}, const Generator g=Generator(0), std::size_t num_thread=-1)
Definition: host_tensor.hpp:971
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:1098
void ForEach(const F &&f) const
Definition: host_tensor.hpp:868
Tensor(const Lengths &lens, const Strides &strides, Rest &&... rest)
Definition: host_tensor.hpp:747
decltype(auto) GetStrides() const
Definition: host_tensor.hpp:806
std::size_t GetElementSpaceSize() const
Definition: host_tensor.hpp:812
Tensor(std::initializer_list< X > lens)
Definition: host_tensor.hpp:701
Descriptor mDesc
Definition: host_tensor.hpp:1161
auto AsSpan() const
Definition: host_tensor.hpp:1142
void SetZero()
Definition: host_tensor.hpp:826
void ForEach_impl(F &&f, std::vector< size_t > &idx, size_t rank)
Definition: host_tensor.hpp:829
const T & operator()(const std::vector< std::size_t > &idx) const
Definition: host_tensor.hpp:1122
Data::const_pointer data() const
Definition: host_tensor.hpp:1137
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides)
Definition: host_tensor.hpp:706
Data mData
Definition: host_tensor.hpp:1162
std::vector< T > Data
Definition: host_tensor.hpp:698
Tensor(const Descriptor &desc)
Definition: host_tensor.hpp:752
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides, Rest &&... rest)
Definition: host_tensor.hpp:732
T & operator()(Is... is)
Definition: host_tensor.hpp:1104
Tensor & operator=(const Tensor &)=default
Data::pointer data()
Definition: host_tensor.hpp:1131
std::size_t GetElementSpaceSizeInBytes() const
Definition: host_tensor.hpp:824
Tensor(const Lengths &lens, const Strides &strides)
Definition: host_tensor.hpp:717
auto AsSpan()
Definition: host_tensor.hpp:1152
Tensor(std::initializer_list< X > lens, Rest &&... rest)
Definition: host_tensor.hpp:723
decltype(auto) GetLengths() const
Definition: host_tensor.hpp:804
Tensor & operator=(Tensor &&)=default
Tensor< OutT > CopyAsType() const
Definition: host_tensor.hpp:755
std::size_t GetNumOfDimension() const
Definition: host_tensor.hpp:808
void ForEach_impl(const F &&f, std::vector< size_t > &idx, size_t rank) const
Definition: host_tensor.hpp:852
Data::size_type size() const
Definition: host_tensor.hpp:1139
void ForEach(F &&f)
Definition: host_tensor.hpp:845
Tensor(const Lengths &lens)
Definition: host_tensor.hpp:712
T & operator()(const std::vector< std::size_t > &idx)
Definition: host_tensor.hpp:1117
std::size_t GetElementSize() const
Definition: host_tensor.hpp:810
const T & operator()(Is... is) const
Definition: host_tensor.hpp:1111
Tensor(const Tensor< FromT > &other)
Definition: host_tensor.hpp:775
Data::const_iterator end() const
Definition: host_tensor.hpp:1135
void savetxt(std::string file_name, std::string dtype="float")
Definition: host_tensor.hpp:778
Data::iterator end()
Definition: host_tensor.hpp:1129
Data::iterator begin()
Definition: host_tensor.hpp:1127
Tensor(const Lengths &lens, Rest &&... rest)
Definition: host_tensor.hpp:738
Tensor(const Tensor &)=default
void GenerateTensorValue(G g, std::size_t num_thread=1)
Definition: host_tensor.hpp:875
Tensor(Tensor &&)=default
Data::const_iterator begin() const
Definition: host_tensor.hpp:1133
Definition: integral_constant.hpp:20
Definition: host_tensor.hpp:618
joinable_thread & operator=(joinable_thread &&)=default
~joinable_thread()
Definition: host_tensor.hpp:627
joinable_thread(Xs &&... xs)
Definition: host_tensor.hpp:620
joinable_thread(joinable_thread &&)=default
Definition: functional2.hpp:33
Definition: tensor_layout.hpp:10
Definition: tensor_layout.hpp:45
Definition: tensor_layout.hpp:31
Definition: tensor_layout.hpp:26
Definition: dtype_vector.hpp:11
__host__ constexpr __device__ const auto & layout(const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Get Tensor Layout.
Definition: tensor_utils.hpp:162