22 template <
typename Range>
26 int precision = std::cout.precision(),
36 os << std::setw(width) << std::setprecision(precision) << v;
41 template <
typename T,
typename Range>
45 int precision = std::cout.precision(),
55 os << std::setw(width) << std::setprecision(precision) << static_cast<T>(v);
60 template <
typename F,
typename T, std::size_t... Is>
63 return f(std::get<Is>(args)...);
66 template <
typename F,
typename T>
69 constexpr std::size_t N = std::tuple_size<T>{};
74 template <
typename F,
typename T, std::size_t... Is>
77 return F(std::get<Is>(args)...);
80 template <
typename F,
typename T>
83 constexpr std::size_t N = std::tuple_size<T>{};
85 return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
95 mStrides.resize(mLens.size(), 0);
100 std::partial_sum(mLens.rbegin(),
102 mStrides.rbegin() + 1,
103 std::multiplies<std::size_t>());
106 template <
typename X,
typename = std::enable_if_t<std::is_convertible_v<X, std::
size_t>>>
112 template <
typename Lengths,
114 std::is_convertible_v<ck_tile::ranges::range_value_t<Lengths>, std::size_t>>>
120 template <
typename X,
122 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
123 std::is_convertible_v<Y, std::size_t>>>
125 const std::initializer_list<Y>& strides)
126 : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
130 template <
typename Lengths,
133 std::is_convertible_v<ck_tile::ranges::range_value_t<Lengths>, std::size_t> &&
134 std::is_convertible_v<ck_tile::ranges::range_value_t<Strides>, std::size_t>>>
136 : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
143 assert(mLens.size() == mStrides.size());
144 return std::accumulate(
145 mLens.begin(), mLens.end(), std::size_t{1}, std::multiplies<std::size_t>());
149 std::size_t space = 1;
150 for(std::size_t i = 0; i < mLens.size(); ++i)
155 space += (mLens[i] - 1) * mStrides[i];
160 std::size_t
get_length(std::size_t dim)
const {
return mLens[dim]; }
162 const std::vector<std::size_t>&
get_lengths()
const {
return mLens; }
164 std::size_t
get_stride(std::size_t dim)
const {
return mStrides[dim]; }
166 const std::vector<std::size_t>&
get_strides()
const {
return mStrides; }
168 template <
typename... Is>
172 std::initializer_list<std::size_t> iss{
static_cast<std::size_t
>(is)...};
197 std::vector<std::size_t> mLens;
198 std::vector<std::size_t> mStrides;
201 template <
typename New2Old>
217 template <
typename F,
typename... Xs>
221 static constexpr std::size_t
NDIM =
sizeof...(Xs);
222 std::array<std::size_t, NDIM>
mLens;
229 std::partial_sum(
mLens.rbegin(),
232 std::multiplies<std::size_t>());
238 std::array<std::size_t, NDIM> indices;
240 for(std::size_t idim = 0; idim <
NDIM; ++idim)
243 i -= indices[idim] *
mStrides[idim];
251 std::size_t work_per_thread = (
mN1d + num_thread - 1) / num_thread;
253 std::vector<joinable_thread> threads(num_thread);
255 for(std::size_t it = 0; it < num_thread; ++it)
257 std::size_t iw_begin = it * work_per_thread;
258 std::size_t iw_end =
std::min((it + 1) * work_per_thread,
mN1d);
260 auto f = [
this, iw_begin, iw_end] {
261 for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
271 template <
typename F,
typename... Xs>
277 template <
typename T>
283 template <
typename X>
288 template <
typename X,
typename Y>
289 HostTensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
294 template <
typename Lengths>
299 template <
typename Lengths,
typename Str
ides>
307 template <
typename OutT>
312 return ck_tile::type_convert<OutT>(value);
326 template <
typename FromT>
353 template <
typename F>
369 template <
typename F>
376 template <
typename F>
392 template <
typename F>
399 template <
typename G>
405 auto f = [&](
auto i) { (*this)(i) = g(i); };
410 auto f = [&](
auto i0,
auto i1) { (*this)(i0, i1) = g(i0, i1); };
416 auto f = [&](
auto i0,
auto i1,
auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
424 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3) {
425 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
435 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4) {
436 (*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4);
447 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4,
auto i5) {
448 (*this)(i0, i1, i2, i3, i4, i5) = g(i0, i1, i2, i3, i4, i5);
459 default:
throw std::runtime_error(
"unspported dimension");
463 template <
typename... Is>
469 template <
typename... Is>
475 template <
typename... Is>
496 std::iota(axes.rbegin(), axes.rend(), 0);
500 throw std::runtime_error(
501 "HostTensor::transpose(): size of axes must match tensor dimension");
503 std::vector<size_t> tlengths, tstrides;
504 for(
const auto& axis : axes)
509 HostTensor<T> ret(*
this);
521 typename Data::iterator
end() {
return mData.end(); }
525 typename Data::const_iterator
begin()
const {
return mData.begin(); }
527 typename Data::const_iterator
end()
const {
return mData.end(); }
529 typename Data::const_pointer
data()
const {
return mData.data(); }
531 typename Data::size_type
size()
const {
return mData.size(); }
535 auto slice(std::vector<size_t> s_begin, std::vector<size_t> s_end)
const
537 assert(s_begin.size() == s_end.size());
540 std::vector<size_t> s_len(s_begin.size());
542 s_end.begin(), s_end.end(), s_begin.begin(), s_len.begin(), std::minus<size_t>{});
545 sliced_tensor.
ForEach([&](
auto&
self,
auto idx) {
546 std::vector<size_t> src_idx(idx.size());
548 idx.begin(), idx.end(), s_begin.begin(), src_idx.begin(), std::plus<size_t>{});
549 self(idx) =
operator()(src_idx);
552 return sliced_tensor;
555 template <
typename U = T>
558 constexpr std::size_t FromSize =
sizeof(T);
559 constexpr std::size_t ToSize =
sizeof(U);
561 using Element = std::add_const_t<std::remove_reference_t<U>>;
563 size() * FromSize / ToSize};
566 template <
typename U = T>
569 constexpr std::size_t FromSize =
sizeof(T);
570 constexpr std::size_t ToSize =
sizeof(U);
572 using Element = std::remove_reference_t<U>;
574 size() * FromSize / ToSize};
581 for(
typename Data::size_type idx = 0; idx < t.
mData.size(); ++idx)
587 if constexpr(std::is_same_v<T, bf16_t> || std::is_same_v<T, fp16_t>)
589 os << type_convert<float>(t.
mData[idx]) <<
" #### ";
607 void loadtxt(std::string file_name, std::string dtype =
"float")
609 std::ifstream file(file_name);
616 while(std::getline(file, line))
620 throw std::runtime_error(std::string(
"data read from file:") + file_name +
626 mData[cnt] = type_convert<T>(std::stof(line));
628 else if(dtype ==
"int" || dtype ==
"int32")
630 mData[cnt] = type_convert<T>(std::stoi(line));
637 std::cerr <<
"Warning! reading from file:" << file_name
638 <<
", does not match the size of this tensor" << std::endl;
645 throw std::runtime_error(std::string(
"unable to open file:") + file_name);
651 void savetxt(std::string file_name, std::string dtype =
"float")
653 std::ofstream file(file_name);
657 for(
auto& itm :
mData)
660 file << type_convert<float>(itm) << std::endl;
661 else if(dtype ==
"int")
662 file << type_convert<int>(itm) << std::endl;
666 file << type_convert<float>(itm) << std::endl;
674 throw std::runtime_error(std::string(
"unable to open file:") + file_name);
682 template <
bool is_row_major>
690 if constexpr(is_row_major)
699 template <
bool is_row_major>
707 if constexpr(is_row_major)
#define CK_TILE_HOST
Definition: config.hpp:39
__host__ constexpr __device__ auto rank([[maybe_unused]] const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
Definition: layout_utils.hpp:310
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
Definition: literals.hpp:9
Definition: cluster_descriptor.hpp:13
CK_TILE_HOST auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:272
CK_TILE_HOST auto call_f_unpack_args(F f, T args)
Definition: host_tensor.hpp:67
CK_TILE_HOST HostTensorDescriptor transpose_host_tensor_descriptor_given_new2old(const HostTensorDescriptor &a, const New2Old &new2old)
Definition: host_tensor.hpp:202
CK_TILE_HOST auto call_f_unpack_args_impl(F f, T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:61
auto host_tensor_descriptor(std::size_t row, std::size_t col, std::size_t stride, bool_constant< is_row_major >)
Definition: host_tensor.hpp:683
CK_TILE_HOST std::ostream & LogRangeAsType(std::ostream &os, Range &&range, std::string delim, int precision=std::cout.precision(), int width=0)
Definition: host_tensor.hpp:42
int32_t index_t
Definition: integer.hpp:9
CK_TILE_HOST std::ostream & LogRange(std::ostream &os, Range &&range, std::string delim, int precision=std::cout.precision(), int width=0)
Definition: host_tensor.hpp:23
CK_TILE_HOST auto construct_f_unpack_args(F, T args)
Definition: host_tensor.hpp:81
CK_TILE_HOST auto construct_f_unpack_args_impl(T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:75
auto get_default_stride(std::size_t row, std::size_t col, std::size_t stride, bool_constant< is_row_major >)
Definition: host_tensor.hpp:700
typename std::enable_if< B, T >::type enable_if_t
Definition: enable_if.hpp:13
__device__ void inner_product(const TA &a, const TB &b, TC &c)
Definition: host_tensor.hpp:97
Definition: host_tensor.hpp:89
std::size_t get_stride(std::size_t dim) const
Definition: host_tensor.hpp:164
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:169
std::size_t get_element_size() const
Definition: host_tensor.hpp:141
void CalculateStrides()
Definition: host_tensor.hpp:92
std::size_t get_num_of_dimension() const
Definition: host_tensor.hpp:140
std::size_t GetOffsetFromMultiIndex(std::vector< std::size_t > iss) const
Definition: host_tensor.hpp:176
HostTensorDescriptor(const std::initializer_list< X > &lens, const std::initializer_list< Y > &strides)
Definition: host_tensor.hpp:124
std::size_t get_element_space_size() const
Definition: host_tensor.hpp:147
const std::vector< std::size_t > & get_strides() const
Definition: host_tensor.hpp:166
const std::vector< std::size_t > & get_lengths() const
Definition: host_tensor.hpp:162
std::size_t get_length(std::size_t dim) const
Definition: host_tensor.hpp:160
HostTensorDescriptor(const Lengths &lens, const Strides &strides)
Definition: host_tensor.hpp:135
HostTensorDescriptor()=default
HostTensorDescriptor(const std::initializer_list< X > &lens)
Definition: host_tensor.hpp:107
HostTensorDescriptor(const Lengths &lens)
Definition: host_tensor.hpp:115
friend std::ostream & operator<<(std::ostream &os, const HostTensorDescriptor &desc)
Definition: host_tensor.hpp:181
Definition: host_tensor.hpp:279
void ForEach(F &&f)
Definition: host_tensor.hpp:370
std::size_t get_stride(std::size_t dim) const
Definition: host_tensor.hpp:335
void ForEach(const F &&f) const
Definition: host_tensor.hpp:393
HostTensor(HostTensor &&)=default
Data::size_type size() const
Definition: host_tensor.hpp:531
decltype(auto) get_lengths() const
Definition: host_tensor.hpp:333
HostTensor(std::initializer_list< X > lens, std::initializer_list< Y > strides)
Definition: host_tensor.hpp:289
HostTensor & operator=(HostTensor &&)=default
friend std::ostream & operator<<(std::ostream &os, const HostTensor< T > &t)
Definition: host_tensor.hpp:577
HostTensor(std::initializer_list< X > lens)
Definition: host_tensor.hpp:284
HostTensor & operator=(const HostTensor &)=default
std::size_t get_element_space_size_in_bytes() const
Definition: host_tensor.hpp:345
decltype(auto) get_strides() const
Definition: host_tensor.hpp:337
HostTensor(const HostTensor &)=default
Data::iterator end()
Definition: host_tensor.hpp:521
void GenerateTensorValue(G g, std::size_t num_thread=1)
Definition: host_tensor.hpp:400
void SetZero()
Definition: host_tensor.hpp:351
Descriptor mDesc
Definition: host_tensor.hpp:678
const T & operator()(Is... is) const
Definition: host_tensor.hpp:476
HostTensor(const Lengths &lens)
Definition: host_tensor.hpp:295
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:464
Data::pointer data()
Definition: host_tensor.hpp:523
T & operator()(Is... is)
Definition: host_tensor.hpp:470
HostTensor< OutT > CopyAsType() const
Definition: host_tensor.hpp:308
auto AsSpan() const
Definition: host_tensor.hpp:556
auto slice(std::vector< size_t > s_begin, std::vector< size_t > s_end) const
Definition: host_tensor.hpp:535
std::vector< T > Data
Definition: host_tensor.hpp:281
auto AsSpan()
Definition: host_tensor.hpp:567
Data::const_iterator begin() const
Definition: host_tensor.hpp:525
std::size_t get_num_of_dimension() const
Definition: host_tensor.hpp:339
std::size_t get_element_space_size() const
Definition: host_tensor.hpp:343
HostTensor(const Lengths &lens, const Strides &strides)
Definition: host_tensor.hpp:300
void loadtxt(std::string file_name, std::string dtype="float")
Definition: host_tensor.hpp:607
Data::const_pointer data() const
Definition: host_tensor.hpp:529
T & operator()(std::vector< std::size_t > idx)
Definition: host_tensor.hpp:481
void ForEach_impl(const F &&f, std::vector< size_t > &idx, size_t rank) const
Definition: host_tensor.hpp:377
HostTensor(const Descriptor &desc)
Definition: host_tensor.hpp:305
const T & operator()(std::vector< std::size_t > idx) const
Definition: host_tensor.hpp:486
HostTensor< T > transpose(std::vector< size_t > axes={})
Definition: host_tensor.hpp:514
Data::iterator begin()
Definition: host_tensor.hpp:519
void savetxt(std::string file_name, std::string dtype="float")
Definition: host_tensor.hpp:651
HostTensor(const HostTensor< FromT > &other)
Definition: host_tensor.hpp:327
HostTensor< T > transpose(std::vector< size_t > axes={}) const
Definition: host_tensor.hpp:491
std::size_t get_length(std::size_t dim) const
Definition: host_tensor.hpp:331
std::size_t get_element_size() const
Definition: host_tensor.hpp:341
void ForEach_impl(F &&f, std::vector< size_t > &idx, size_t rank)
Definition: host_tensor.hpp:354
Data::const_iterator end() const
Definition: host_tensor.hpp:527
Data mData
Definition: host_tensor.hpp:679
Definition: host_tensor.hpp:219
void operator()(std::size_t num_thread=1) const
Definition: host_tensor.hpp:249
ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:226
std::size_t mN1d
Definition: host_tensor.hpp:224
std::array< std::size_t, NDIM > mLens
Definition: host_tensor.hpp:222
std::array< std::size_t, NDIM > mStrides
Definition: host_tensor.hpp:223
static constexpr std::size_t NDIM
Definition: host_tensor.hpp:221
F mF
Definition: host_tensor.hpp:220
std::array< std::size_t, NDIM > GetNdIndices(std::size_t i) const
Definition: host_tensor.hpp:236
Definition: integral_constant.hpp:13
Definition: joinable_thread.hpp:12