/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/fill.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/fill.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/fill.hpp Source File
fill.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <iterator>
9 #include <optional>
10 #include <random>
11 #include <type_traits>
12 #include <utility>
13 #include <unordered_set>
14 
15 #include "ck_tile/core.hpp"
17 
18 namespace ck_tile {
19 
37 template <typename T>
39 {
40  float a_{-5.f};
41  float b_{5.f};
42  std::optional<uint32_t> seed_{11939};
43  // ATTENTION: Whether to use multi-threading (note: not guaranteed to be perfectly distributed
44  // across threads).
45  bool threaded = false;
46 
47  template <typename ForwardIter>
48  void operator()(ForwardIter first, ForwardIter last) const
49  {
50  if(threaded)
51  {
52  uint32_t num_thread = std::thread::hardware_concurrency();
53  auto total = static_cast<std::size_t>(std::distance(first, last));
54  auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
55 
56  std::vector<joinable_thread> threads(num_thread);
57  for(std::size_t it = 0; it < num_thread; ++it)
58  {
59  std::size_t iw_begin = it * work_per_thread;
60  std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
61  auto thread_f = [this, total, iw_begin, iw_end, &first] {
62  if(iw_begin > total || iw_end > total)
63  return;
64  // need to make each thread unique, add an offset to current seed
65  std::mt19937 gen(seed_.has_value() ? (*seed_ + iw_begin)
66  : std::random_device{}());
67  std::uniform_real_distribution<float> dis(a_, b_);
68  std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
69  return ck_tile::type_convert<T>(dis(gen));
70  });
71  };
72  threads[it] = joinable_thread(thread_f);
73  }
74  }
75  else
76  {
77  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
78  std::uniform_real_distribution<float> dis(a_, b_);
79  std::generate(
80  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
81  }
82  }
83 
84  template <typename ForwardRange>
85  auto operator()(ForwardRange&& range) const
86  -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
87  std::begin(std::forward<ForwardRange>(range)),
88  std::end(std::forward<ForwardRange>(range))))>
89  {
90  (*this)(std::begin(std::forward<ForwardRange>(range)),
91  std::end(std::forward<ForwardRange>(range)));
92  }
93 };
94 
95 namespace impl {
96 
97 // clang-format off
98 template<index_t bytes> struct RawIntegerType_ {};
99 template<> struct RawIntegerType_<1> { using type = uint8_t;};
100 template<> struct RawIntegerType_<2> { using type = uint16_t;};
101 template<> struct RawIntegerType_<4> { using type = uint32_t;};
102 template<> struct RawIntegerType_<8> { using type = uint64_t;};
103 // clang-format on
104 
105 template <typename T>
106 using RawIntegerType = typename RawIntegerType_<sizeof(T)>::type;
107 } // namespace impl
108 
109 // Note: this struct will have no const-ness will generate random
110 template <typename T>
112 {
113  float a_{-5.f};
114  float b_{5.f};
115  std::optional<uint32_t> seed_{11939};
116 
117  std::mt19937 gen_{};
118  std::unordered_set<impl::RawIntegerType<T>> set_{};
119 
121  float b = 5.f,
122  std::optional<uint32_t> seed = {11939})
123  : a_(a),
124  b_(b),
125  seed_(seed),
126  gen_{seed_.has_value() ? *seed_ : std::random_device{}()},
127  set_{}
128  {
129  }
130 
131  template <typename ForwardIter>
132  void operator()(ForwardIter first, ForwardIter last)
133  {
134  std::mt19937& gen = gen_;
135  std::uniform_real_distribution<float> dis(a_, b_);
136  auto& set = set_;
137  std::generate(first, last, [&dis, &gen, &set]() {
138  T v = static_cast<T>(0);
139  do
140  {
141  v = ck_tile::type_convert<T>(dis(gen));
142  } while(set.count(bit_cast<impl::RawIntegerType<T>>(v)) == 1);
144 
145  return v;
146  });
147  }
148 
149  template <typename ForwardRange>
150  auto operator()(ForwardRange&& range)
151  -> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
152  std::begin(std::forward<ForwardRange>(range)),
153  std::end(std::forward<ForwardRange>(range))))>
154  {
155  (*this)(std::begin(std::forward<ForwardRange>(range)),
156  std::end(std::forward<ForwardRange>(range)));
157  }
158 
159  void clear() { set_.clear(); }
160 };
161 
162 template <typename T>
164 {
165  float mean_{0.f};
166  float variance_{1.f};
167  std::optional<uint32_t> seed_{11939};
168  // ATTENTION: threaded does not guarantee the distribution between thread
169  bool threaded = false;
170 
171  template <typename ForwardIter>
172  void operator()(ForwardIter first, ForwardIter last) const
173  {
174  if(threaded)
175  {
176  uint32_t num_thread = std::thread::hardware_concurrency();
177  auto total = static_cast<std::size_t>(std::distance(first, last));
178  auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
179 
180  std::vector<joinable_thread> threads(num_thread);
181  for(std::size_t it = 0; it < num_thread; ++it)
182  {
183  std::size_t iw_begin = it * work_per_thread;
184  std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
185  auto thread_f = [this, total, iw_begin, iw_end, &first] {
186  if(iw_begin > total || iw_end > total)
187  return;
188  // need to make each thread unique, add an offset to current seed
189  std::mt19937 gen(seed_.has_value() ? (*seed_ + iw_begin)
190  : std::random_device{}());
191  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
192  std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
193  return ck_tile::type_convert<T>(dis(gen));
194  });
195  };
196  threads[it] = joinable_thread(thread_f);
197  }
198  }
199  else
200  {
201  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
202  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
203  std::generate(
204  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
205  }
206  }
207 
208  template <typename ForwardRange>
209  auto operator()(ForwardRange&& range) const
210  -> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
211  std::begin(std::forward<ForwardRange>(range)),
212  std::end(std::forward<ForwardRange>(range))))>
213  {
214  (*this)(std::begin(std::forward<ForwardRange>(range)),
215  std::end(std::forward<ForwardRange>(range)));
216  }
217 };
218 
219 // Normally FillUniformDistributionIntegerValue should use std::uniform_int_distribution as below.
220 // However this produces segfaults in std::mt19937 which look like inifite loop.
221 // template <typename T>
222 // struct FillUniformDistributionIntegerValue
223 // {
224 // int a_{-5};
225 // int b_{5};
226 //
227 // template <typename ForwardIter>
228 // void operator()(ForwardIter first, ForwardIter last) const
229 // {
230 // std::mt19937 gen(11939);
231 // std::uniform_int_distribution<int> dis(a_, b_);
232 // std::generate(
233 // first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
234 // }
235 // };
236 
237 // Workaround for uniform_int_distribution not working as expected. See note above.<
238 template <typename T>
240 {
241  float a_{-5.f};
242  float b_{5.f};
243  std::optional<uint32_t> seed_{11939};
244 
245  template <typename ForwardIter>
246  void operator()(ForwardIter first, ForwardIter last) const
247  {
248  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
249  std::uniform_real_distribution<float> dis(a_, b_);
250  std::generate(
251  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
252  }
253 
254  template <typename ForwardRange>
255  auto operator()(ForwardRange&& range) const
256  -> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
257  std::begin(std::forward<ForwardRange>(range)),
258  std::end(std::forward<ForwardRange>(range))))>
259  {
260  (*this)(std::begin(std::forward<ForwardRange>(range)),
261  std::end(std::forward<ForwardRange>(range)));
262  }
263 };
264 
265 template <typename T>
267 {
268  float mean_{0.f};
269  float variance_{1.f};
270  std::optional<uint32_t> seed_{11939};
271 
272  template <typename ForwardIter>
273  void operator()(ForwardIter first, ForwardIter last) const
274  {
275  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
276  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
277  std::generate(
278  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
279  }
280 
281  template <typename ForwardRange>
282  auto operator()(ForwardRange&& range) const
283  -> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
284  std::begin(std::forward<ForwardRange>(range)),
285  std::end(std::forward<ForwardRange>(range))))>
286  {
287  (*this)(std::begin(std::forward<ForwardRange>(range)),
288  std::end(std::forward<ForwardRange>(range)));
289  }
290 };
291 
292 template <typename T>
294 {
296  T step_{1};
297 
298  template <typename ForwardIter>
299  void operator()(ForwardIter first, ForwardIter last) const
300  {
301  std::generate(first, last, [=, *this, n = init_value_]() mutable {
302  auto tmp = n;
303  if constexpr(std::is_same_v<decltype(tmp), pk_int4_t>)
304  {
305  n.data += step_.data;
306  }
307  else
308  {
309  n += step_;
310  }
311  return tmp;
312  });
313  }
314 
315  template <typename ForwardRange>
316  auto operator()(ForwardRange&& range) const
317  -> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
318  std::begin(std::forward<ForwardRange>(range)),
319  std::end(std::forward<ForwardRange>(range))))>
320  {
321  (*this)(std::begin(std::forward<ForwardRange>(range)),
322  std::end(std::forward<ForwardRange>(range)));
323  }
324 };
325 
326 template <typename T, bool IsAscending = true>
328 {
329  float start_value_{0};
330  float end_value_{3};
331  float step_{1};
332 
333  template <typename ForwardIter>
334  void operator()(ForwardIter first, ForwardIter last) const
335  {
336  std::generate(first, last, [=, *this, n = start_value_]() mutable {
337  auto tmp = n;
338  n += step_;
339  if constexpr(IsAscending)
340  {
341  if(n > end_value_)
342  n = start_value_;
343  }
344  else
345  {
346  if(n < end_value_)
347  n = start_value_;
348  }
349 
350  return type_convert<T>(tmp);
351  });
352  }
353 
354  template <typename ForwardRange>
355  auto operator()(ForwardRange&& range) const -> std::void_t<
356  decltype(std::declval<const FillStepRange&>()(std::begin(std::forward<ForwardRange>(range)),
357  std::end(std::forward<ForwardRange>(range))))>
358  {
359  (*this)(std::begin(std::forward<ForwardRange>(range)),
360  std::end(std::forward<ForwardRange>(range)));
361  }
362 };
363 
364 template <typename T>
366 {
367  T value_{0};
368 
369  template <typename ForwardIter>
370  void operator()(ForwardIter first, ForwardIter last) const
371  {
372  std::fill(first, last, value_);
373  }
374 
375  template <typename ForwardRange>
376  auto operator()(ForwardRange&& range) const -> std::void_t<
377  decltype(std::declval<const FillConstant&>()(std::begin(std::forward<ForwardRange>(range)),
378  std::end(std::forward<ForwardRange>(range))))>
379  {
380  (*this)(std::begin(std::forward<ForwardRange>(range)),
381  std::end(std::forward<ForwardRange>(range)));
382  }
383 };
384 
385 //----------------------------------------------------------------------------------------------
388 template <typename T>
390 {
391  size_t start{0};
392  // masks represent all valid 2:4 structured sparsity permutations
393  // clang-format off
394  static constexpr int32_t masks[] = {0, 0, 1, 1,
395  0, 1, 0, 1,
396  0, 1, 1, 0,
397  1, 0, 0, 1,
398  1, 0, 1, 0,
399  1, 1, 0, 0,
400  0, 0, 0, 1,
401  0, 0, 1, 0,
402  0, 1, 0, 0,
403  1, 0, 0, 0};
404  // clang-format on
405 
406  template <typename ForwardIter>
407  void operator()(ForwardIter first, ForwardIter last) const
408  {
409  std::transform(first, last, first, [=, *this, index = start](T val) mutable {
410  auto tmp = val * masks[index % (sizeof(masks) / sizeof(int32_t))];
411  index += 1;
412 
413  return type_convert<T>(tmp);
414  });
415  }
416 
417  template <typename ForwardRange>
418  auto operator()(ForwardRange&& range) const
419  -> std::void_t<decltype(std::declval<const AdjustToStructuredSparsity&>()(
420  std::begin(std::forward<ForwardRange>(range)),
421  std::end(std::forward<ForwardRange>(range))))>
422  {
423  (*this)(std::begin(std::forward<ForwardRange>(range)),
424  std::end(std::forward<ForwardRange>(range)));
425  }
426 };
427 
428 template <typename T, bool UseCos = true, bool UseAbs = false>
430 {
431  template <typename T_, bool UseCos_ = true, bool UseAbs_ = false>
433  {
434  int i{0};
435  auto operator()()
436  {
437  float v = 0;
438  if constexpr(UseCos_)
439  {
440  v = cos(i);
441  }
442  else
443  {
444  v = sin(i);
445  }
446  if constexpr(UseAbs_)
447  v = abs(v);
448  i++;
449  return ck_tile::type_convert<T_>(v);
450  }
451  };
452  template <typename ForwardIter>
453  void operator()(ForwardIter first, ForwardIter last) const
454  {
456  std::generate(first, last, gen);
457  }
458 
459  template <typename ForwardRange>
460  auto operator()(ForwardRange&& range) const -> std::void_t<
461  decltype(std::declval<const FillTrigValue&>()(std::begin(std::forward<ForwardRange>(range)),
462  std::end(std::forward<ForwardRange>(range))))>
463  {
464  (*this)(std::begin(std::forward<ForwardRange>(range)),
465  std::end(std::forward<ForwardRange>(range)));
466  }
467 };
468 
469 } // namespace ck_tile
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition: fill.hpp:106
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE Y bit_cast(const X &x)
Definition: bit_cast.hpp:11
CK_TILE_HOST T cos(T x)
Definition: math.hpp:752
CK_TILE_HOST T sin(T x)
Definition: math.hpp:698
int32_t int32_t
Definition: integer.hpp:10
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:393
constexpr bool is_same_v
Definition: type.hpp:283
Transforms given input to fit 2:4 structured sparsity pattern so every subgroup of 4 elements contain...
Definition: fill.hpp:390
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const AdjustToStructuredSparsity & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:418
size_t start
Definition: fill.hpp:391
static constexpr int32_t masks[]
Definition: fill.hpp:394
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:407
Definition: fill.hpp:366
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillConstant & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:376
T value_
Definition: fill.hpp:367
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:370
Definition: fill.hpp:294
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillMonotonicSeq & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:316
T init_value_
Definition: fill.hpp:295
T step_
Definition: fill.hpp:296
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:299
Definition: fill.hpp:164
std::optional< uint32_t > seed_
Definition: fill.hpp:167
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:172
float variance_
Definition: fill.hpp:166
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:209
bool threaded
Definition: fill.hpp:169
float mean_
Definition: fill.hpp:165
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:273
float mean_
Definition: fill.hpp:268
float variance_
Definition: fill.hpp:269
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:282
std::optional< uint32_t > seed_
Definition: fill.hpp:270
Definition: fill.hpp:328
float end_value_
Definition: fill.hpp:330
float start_value_
Definition: fill.hpp:329
float step_
Definition: fill.hpp:331
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:334
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillStepRange & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:355
int i
Definition: fill.hpp:434
auto operator()()
Definition: fill.hpp:435
Definition: fill.hpp:430
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:453
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillTrigValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:460
void operator()(ForwardIter first, ForwardIter last)
Definition: fill.hpp:132
std::optional< uint32_t > seed_
Definition: fill.hpp:115
float a_
Definition: fill.hpp:113
FillUniformDistribution_Unique(float a=-5.f, float b=5.f, std::optional< uint32_t > seed={11939})
Definition: fill.hpp:120
auto operator()(ForwardRange &&range) -> std::void_t< decltype(std::declval< FillUniformDistribution_Unique & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:150
std::mt19937 gen_
Definition: fill.hpp:117
void clear()
Definition: fill.hpp:159
float b_
Definition: fill.hpp:114
std::unordered_set< impl::RawIntegerType< T > > set_
Definition: fill.hpp:118
Definition: fill.hpp:39
float b_
Definition: fill.hpp:41
bool threaded
Definition: fill.hpp:45
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:48
float a_
Definition: fill.hpp:40
std::optional< uint32_t > seed_
Definition: fill.hpp:42
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillUniformDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:85
std::optional< uint32_t > seed_
Definition: fill.hpp:243
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillUniformDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:255
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:246
float b_
Definition: fill.hpp:242
float a_
Definition: fill.hpp:241
uint8_t type
Definition: fill.hpp:99
uint16_t type
Definition: fill.hpp:100
uint32_t type
Definition: fill.hpp:101
uint64_t type
Definition: fill.hpp:102
Definition: fill.hpp:98
Definition: joinable_thread.hpp:12
Definition: pk_int4.hpp:21