147 static_assert(XPerTile % VecSize == 0,
"XPerTile must be a multiple of VecSize!");
150 static constexpr
index_t LargestVec = (XPerTile * YPerTile) / (num_warps * warp_size);
151 static constexpr
index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
156 static_assert(X0 * Y1 == warp_size,
"X0 * Y1 must cover whole wavefront!");
158 static constexpr
index_t Y0 = num_warps / NumWaveGroups;
161 static constexpr
index_t Y2 = YPerTile / (Y1 * Y0);
163 static_assert(X0 * Y1 * Y0 * NumWaveGroups == BlockSize,
164 "X0 * warp_ys * Y0 must cover whole workgroup!");
165 static_assert(Y0 * Y1 * Y2 == YPerTile,
"Y0, Y1, Y2 must cover whole YPerTile");
169 if constexpr(NumWaveGroups != 1)
193 if constexpr(NumWaveGroups != 1)
230 static_assert(XPerTile % VecSize == 0,
"XPerTile must be a multiple of VecSize!");
233 static constexpr
index_t LargestVec = (XPerTile * YPerTile) / (num_warps * warp_size);
234 static constexpr
index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
238 static_assert(X0 * Y2 == warp_size,
"X0 * Y2 must cover whole wavefront!");
241 static_assert(X0 * Y2 * Y0 == BlockSize,
"X0 * Y2 * Y1 must cover whole workgroup!");
243 static constexpr
index_t Y1 = YPerTile / (Y2 * Y0);
244 static_assert(Y0 * Y1 * Y2 == YPerTile,
"Y0, Y1, Y2 must cover whole YPerTile");
284 static_assert(XPerTile % VecSize == 0,
"XPerTile must be a multiple of VecSize!");
287 static constexpr
index_t LargestVec = (XPerTile * YPerTile) / (num_warps * warp_size);
288 static constexpr
index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
291 static_assert(X0 * Y2 == warp_size,
"X0 * Y2 must cover whole wavefront!");
293 static_assert(X0 * Y2 * Y1 == BlockSize,
"X0 * Y2 * Y1 must cover whole workgroup!");
294 static constexpr
index_t Y0 = YPerTile / (Y2 * Y1);
295 static_assert(Y0 * Y1 * Y2 == YPerTile,
"Y0, Y1, Y2 must cover whole YPerTile");
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE index_t get_warp_size()
Definition: arch.hpp:51
int32_t index_t
Definition: integer.hpp:9
tile_distribution_pattern
Enumeration describing static tile distribution patterns.
Definition: static_encoding_pattern.hpp:88
@ block_raked
Block raked pattern - aka linear.
@ thread_raked
Thread raked pattern.
@ warp_raked
Warp raked pattern.
constexpr CK_TILE_HOST_DEVICE auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition: tile_distribution.hpp:498
static constexpr CK_TILE_HOST_DEVICE auto Make2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:246
static constexpr CK_TILE_HOST_DEVICE auto MakeShuffled2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:257
static constexpr CK_TILE_HOST_DEVICE auto Make2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:167
static constexpr CK_TILE_HOST_DEVICE auto MakeShuffled2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:191
static constexpr CK_TILE_HOST_DEVICE auto MakeShuffled2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:308
static constexpr CK_TILE_HOST_DEVICE auto Make2DStaticTileDistribution()
Definition: static_encoding_pattern.hpp:297
Class creating 2D static tile distribution with different load/store patterns.
Definition: static_encoding_pattern.hpp:129
Definition: static_encoding_pattern.hpp:107
Definition: sequence.hpp:52
Definition: tile_distribution_encoding.hpp:26
Definition: tuple.hpp:192