11 #include <type_traits>
13 #include <unordered_set>
42 std::optional<uint32_t>
seed_{11939};
47 template <
typename ForwardIter>
48 void operator()(ForwardIter first, ForwardIter last)
const
52 uint32_t num_thread = std::thread::hardware_concurrency();
53 auto total =
static_cast<std::size_t
>(std::distance(first, last));
54 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
56 std::vector<joinable_thread> threads(num_thread);
57 for(std::size_t it = 0; it < num_thread; ++it)
59 std::size_t iw_begin = it * work_per_thread;
60 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
61 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
62 if(iw_begin > total || iw_end > total)
65 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
66 : std::random_device{}());
67 std::uniform_real_distribution<float> dis(
a_,
b_);
68 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
69 return ck_tile::type_convert<T>(dis(gen));
77 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
78 std::uniform_real_distribution<float> dis(
a_,
b_);
80 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen)); });
84 template <
typename ForwardRange>
86 -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
87 std::begin(std::forward<ForwardRange>(range)),
88 std::end(std::forward<ForwardRange>(range))))>
90 (*this)(std::begin(std::forward<ForwardRange>(range)),
91 std::end(std::forward<ForwardRange>(range)));
105 template <
typename T>
110 template <
typename T>
115 std::optional<uint32_t>
seed_{11939};
118 std::unordered_set<impl::RawIntegerType<T>>
set_{};
122 std::optional<uint32_t> seed = {11939})
131 template <
typename ForwardIter>
134 std::mt19937& gen =
gen_;
135 std::uniform_real_distribution<float> dis(
a_,
b_);
137 std::generate(first, last, [&dis, &gen, &
set]() {
138 T v =
static_cast<T
>(0);
141 v = ck_tile::type_convert<T>(dis(gen));
149 template <
typename ForwardRange>
151 -> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
152 std::begin(std::forward<ForwardRange>(range)),
153 std::end(std::forward<ForwardRange>(range))))>
155 (*this)(std::begin(std::forward<ForwardRange>(range)),
156 std::end(std::forward<ForwardRange>(range)));
162 template <
typename T>
167 std::optional<uint32_t>
seed_{11939};
171 template <
typename ForwardIter>
176 uint32_t num_thread = std::thread::hardware_concurrency();
177 auto total =
static_cast<std::size_t
>(std::distance(first, last));
178 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
180 std::vector<joinable_thread> threads(num_thread);
181 for(std::size_t it = 0; it < num_thread; ++it)
183 std::size_t iw_begin = it * work_per_thread;
184 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
185 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
186 if(iw_begin > total || iw_end > total)
189 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
190 : std::random_device{}());
192 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
193 return ck_tile::type_convert<T>(dis(gen));
201 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
204 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen)); });
208 template <
typename ForwardRange>
210 -> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
211 std::begin(std::forward<ForwardRange>(range)),
212 std::end(std::forward<ForwardRange>(range))))>
214 (*this)(std::begin(std::forward<ForwardRange>(range)),
215 std::end(std::forward<ForwardRange>(range)));
238 template <
typename T>
243 std::optional<uint32_t>
seed_{11939};
245 template <
typename ForwardIter>
248 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
249 std::uniform_real_distribution<float> dis(
a_,
b_);
251 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
254 template <
typename ForwardRange>
256 -> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
257 std::begin(std::forward<ForwardRange>(range)),
258 std::end(std::forward<ForwardRange>(range))))>
260 (*this)(std::begin(std::forward<ForwardRange>(range)),
261 std::end(std::forward<ForwardRange>(range)));
265 template <
typename T>
270 std::optional<uint32_t>
seed_{11939};
272 template <
typename ForwardIter>
275 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
278 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
281 template <
typename ForwardRange>
283 -> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
284 std::begin(std::forward<ForwardRange>(range)),
285 std::end(std::forward<ForwardRange>(range))))>
287 (*this)(std::begin(std::forward<ForwardRange>(range)),
288 std::end(std::forward<ForwardRange>(range)));
292 template <
typename T>
298 template <
typename ForwardIter>
301 std::generate(first, last, [=, *
this, n =
init_value_]()
mutable {
305 n.data +=
step_.data;
315 template <
typename ForwardRange>
317 -> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
318 std::begin(std::forward<ForwardRange>(range)),
319 std::end(std::forward<ForwardRange>(range))))>
321 (*this)(std::begin(std::forward<ForwardRange>(range)),
322 std::end(std::forward<ForwardRange>(range)));
326 template <
typename T,
bool IsAscending = true>
333 template <
typename ForwardIter>
336 std::generate(first, last, [=, *
this, n =
start_value_]()
mutable {
339 if constexpr(IsAscending)
350 return type_convert<T>(tmp);
354 template <
typename ForwardRange>
356 decltype(std::declval<const FillStepRange&>()(std::begin(std::forward<ForwardRange>(range)),
357 std::end(std::forward<ForwardRange>(range))))>
359 (*this)(std::begin(std::forward<ForwardRange>(range)),
360 std::end(std::forward<ForwardRange>(range)));
364 template <
typename T>
369 template <
typename ForwardIter>
375 template <
typename ForwardRange>
377 decltype(std::declval<const FillConstant&>()(std::begin(std::forward<ForwardRange>(range)),
378 std::end(std::forward<ForwardRange>(range))))>
380 (*this)(std::begin(std::forward<ForwardRange>(range)),
381 std::end(std::forward<ForwardRange>(range)));
388 template <
typename T>
406 template <
typename ForwardIter>
413 return type_convert<T>(tmp);
417 template <
typename ForwardRange>
419 -> std::void_t<decltype(std::declval<const AdjustToStructuredSparsity&>()(
420 std::begin(std::forward<ForwardRange>(range)),
421 std::end(std::forward<ForwardRange>(range))))>
423 (*this)(std::begin(std::forward<ForwardRange>(range)),
424 std::end(std::forward<ForwardRange>(range)));
428 template <
typename T,
bool UseCos = true,
bool UseAbs = false>
431 template <
typename T_,
bool UseCos_ = true,
bool UseAbs_ = false>
438 if constexpr(UseCos_)
446 if constexpr(UseAbs_)
449 return ck_tile::type_convert<T_>(v);
452 template <
typename ForwardIter>
456 std::generate(first, last, gen);
459 template <
typename ForwardRange>
461 decltype(std::declval<const FillTrigValue&>()(std::begin(std::forward<ForwardRange>(range)),
462 std::end(std::forward<ForwardRange>(range))))>
464 (*this)(std::begin(std::forward<ForwardRange>(range)),
465 std::end(std::forward<ForwardRange>(range)));
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition: fill.hpp:106
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE Y bit_cast(const X &x)
Definition: bit_cast.hpp:11
CK_TILE_HOST T cos(T x)
Definition: math.hpp:752
CK_TILE_HOST T sin(T x)
Definition: math.hpp:698
int32_t int32_t
Definition: integer.hpp:10
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:393
constexpr bool is_same_v
Definition: type.hpp:283
Transforms given input to fit 2:4 structured sparsity pattern so every subgroup of 4 elements contain...
Definition: fill.hpp:390
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const AdjustToStructuredSparsity & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:418
size_t start
Definition: fill.hpp:391
static constexpr int32_t masks[]
Definition: fill.hpp:394
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:407
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillConstant & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:376
T value_
Definition: fill.hpp:367
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:370
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillMonotonicSeq & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:316
T init_value_
Definition: fill.hpp:295
T step_
Definition: fill.hpp:296
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:299
std::optional< uint32_t > seed_
Definition: fill.hpp:167
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:172
float variance_
Definition: fill.hpp:166
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:209
bool threaded
Definition: fill.hpp:169
float mean_
Definition: fill.hpp:165
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:273
float mean_
Definition: fill.hpp:268
float variance_
Definition: fill.hpp:269
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:282
std::optional< uint32_t > seed_
Definition: fill.hpp:270
float end_value_
Definition: fill.hpp:330
float start_value_
Definition: fill.hpp:329
float step_
Definition: fill.hpp:331
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:334
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillStepRange & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:355
int i
Definition: fill.hpp:434
auto operator()()
Definition: fill.hpp:435
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:453
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillTrigValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:460
uint8_t type
Definition: fill.hpp:99
uint16_t type
Definition: fill.hpp:100
uint32_t type
Definition: fill.hpp:101
uint64_t type
Definition: fill.hpp:102
Definition: joinable_thread.hpp:12
Definition: pk_int4.hpp:21