/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp Source File#
block_to_ctile_map.hpp
Go to the documentation of this file.
259 struct BlockToCTileMap_M00_N0_M01Adapt : BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock, void>
547 __host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n,
__host__ constexpr __device__ auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:72
Definition: ck.hpp:264
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:429
__host__ __device__ bool DefaultValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim)
Definition: block_to_ctile_map.hpp:833
__host__ constexpr __device__ auto make_pass_through_transform(const LowLength &low_length)
Definition: multi_index_transform_helper.hpp:12
__host__ constexpr __device__ auto make_insert_transform(const UpperIndex &up_idx)
Definition: multi_index_transform_helper.hpp:104
__host__ constexpr __device__ auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition: multi_index_transform_helper.hpp:90
__host__ constexpr __device__ auto chain_tensor_adaptors(const TensorAdaptor0 &adaptor0, const TensorAdaptor1 &adaptor1)
Definition: tensor_adaptor.hpp:245
Simple tile mapping which creates 3D grid of block of threads.
Definition: block_to_ctile_map.hpp:974
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &) const
Definition: block_to_ctile_map.hpp:1001
__host__ __device__ bool ValidCTileIndex(const CTileIdx &, const CTileDim &) const
Definition: block_to_ctile_map.hpp:994
constexpr __device__ auto CalculateBottomIndex(const TopIdx &) const
Definition: block_to_ctile_map.hpp:988
__host__ constexpr __device__ auto CalculateGridSize(index_t M, index_t N, index_t k_split) const
Definition: block_to_ctile_map.hpp:979
__host__ __device__ BlockToCTileMap_3DGrid_KSplit()=default
Definition: block_to_ctile_map.hpp:1417
__host__ __device__ uint32_t get_sk_tiles() const
Definition: block_to_ctile_map.hpp:1538
MDiv k_iters_per_tile
Definition: block_to_ctile_map.hpp:1433
__host__ __device__ uint32_t get_workspace_size(uint32_t acc_element_bytes) const
Definition: block_to_ctile_map.hpp:1646
__host__ __device__ uint32_t get_tile_intersections(uint32_t tiles_, const MDiv &equiv_tiles_) const
Definition: block_to_ctile_map.hpp:1651
MDiv equiv_tiles_little
Definition: block_to_ctile_map.hpp:1435
__device__ uint32_t get_acc_buffer_offset_from_block(uint32_t block_idx_) const
Definition: block_to_ctile_map.hpp:1709
uint32_t dp_start_block_idx
Definition: block_to_ctile_map.hpp:1429
static constexpr uint32_t KPerBlock
Definition: block_to_ctile_map.hpp:1421
static constexpr uint32_t NPerBlock
Definition: block_to_ctile_map.hpp:1420
static constexpr uint32_t min_k_iters_per_sk_block
Definition: block_to_ctile_map.hpp:1418
__host__ __device__ uint32_t get_sk_total_iters() const
Definition: block_to_ctile_map.hpp:1531
__host__ __device__ uint32_t get_total_acc_buffers() const
Definition: block_to_ctile_map.hpp:1668
__host__ __device__ index_t get_grid_dims() const
Definition: block_to_ctile_map.hpp:1545
__device__ uint32_t get_tile_idx(uint32_t iter) const
Definition: block_to_ctile_map.hpp:1596
__host__ __device__ uint32_t get_workspace_size_for_semaphore() const
Definition: block_to_ctile_map.hpp:1641
__device__ void get_block_itr(uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const
Definition: block_to_ctile_map.hpp:1563
uint32_t k_iters_per_big_block
Definition: block_to_ctile_map.hpp:1431
uint32_t sk_num_big_blocks
Definition: block_to_ctile_map.hpp:1428
MDiv equiv_tiles_big
Definition: block_to_ctile_map.hpp:1434
__device__ uint32_t get_acc_buffer_offset_from_tile(uint32_t tile_idx_) const
Definition: block_to_ctile_map.hpp:1683
__device__ auto tile_to_spatial(uint32_t tile_idx, uint32_t m, uint32_t n) const
Definition: block_to_ctile_map.hpp:1604
__host__ static constexpr __device__ index_t CalculateGridSize(index_t M, index_t N)
Definition: block_to_ctile_map.hpp:1524
__host__ __device__ uint32_t get_workspace_size_for_acc(uint32_t acc_element_bytes) const
Definition: block_to_ctile_map.hpp:1633
__device__ uint32_t get_current_iter_length(uint32_t iter_start, uint32_t iter_end, uint32_t total_iter_length) const
Definition: block_to_ctile_map.hpp:1585
static constexpr uint32_t tile_swizzle_sub_m
Definition: block_to_ctile_map.hpp:1423
static constexpr StreamKReductionStrategy ReductionStrategy
Definition: block_to_ctile_map.hpp:1422
uint32_t reduction_start_block_idx
Definition: block_to_ctile_map.hpp:1430
__host__ __device__ uint32_t get_tiles_cover_sk_block(uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const
Definition: block_to_ctile_map.hpp:1661
__host__ __device__ BlockToCTileMap_GemmStreamK_v2(uint32_t m, uint32_t n, uint32_t k, uint32_t grid_size=1, uint32_t streamk_sel=1)
Definition: block_to_ctile_map.hpp:1438
uint32_t sk_num_blocks
Definition: block_to_ctile_map.hpp:1427
__device__ uint32_t get_block_idx() const
Definition: block_to_ctile_map.hpp:1556
__device__ void get_tile_idx_with_offset(uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const
Definition: block_to_ctile_map.hpp:1599
static constexpr uint32_t MPerBlock
Definition: block_to_ctile_map.hpp:1419
Definition: block_to_ctile_map.hpp:1019
uint32_t k_iters_per_big_block
Definition: block_to_ctile_map.hpp:1033
__host__ __device__ uint32_t get_workspace_size(uint32_t acc_element_bytes) const
Definition: block_to_ctile_map.hpp:1324
__device__ uint32_t get_acc_buffer_offset_from_block(uint32_t block_idx_) const
Definition: block_to_ctile_map.hpp:1387
__host__ __device__ uint32_t get_sk_total_iters() const
Definition: block_to_ctile_map.hpp:1210
__host__ __device__ uint32_t get_tiles_cover_sk_block(uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const
Definition: block_to_ctile_map.hpp:1339
static constexpr uint32_t MPerBlock
Definition: block_to_ctile_map.hpp:1021
uint32_t dp_start_block_idx
Definition: block_to_ctile_map.hpp:1031
__host__ __device__ uint32_t get_sk_tiles() const
Definition: block_to_ctile_map.hpp:1217
static constexpr uint32_t KPerBlock
Definition: block_to_ctile_map.hpp:1023
__host__ __device__ uint32_t get_total_acc_buffers() const
Definition: block_to_ctile_map.hpp:1346
__device__ uint32_t get_current_iter_length(uint32_t iter_start, uint32_t iter_end, uint32_t total_iter_length) const
Definition: block_to_ctile_map.hpp:1263
static constexpr uint32_t NPerBlock
Definition: block_to_ctile_map.hpp:1022
__device__ uint32_t get_acc_buffer_offset_from_tile(uint32_t tile_idx_) const
Definition: block_to_ctile_map.hpp:1361
uint32_t reduction_start_block_idx
Definition: block_to_ctile_map.hpp:1032
__host__ __device__ uint32_t get_workspace_size_for_acc(uint32_t acc_element_bytes) const
Definition: block_to_ctile_map.hpp:1311
MDiv k_iters_per_tile
Definition: block_to_ctile_map.hpp:1035
__device__ void get_tile_idx_with_offset(uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const
Definition: block_to_ctile_map.hpp:1277
static constexpr uint32_t tile_swizzle_sub_m
Definition: block_to_ctile_map.hpp:1025
BlockToCTileMap_GemmStreamK(uint32_t m, uint32_t n, uint32_t k, uint32_t num_cu, uint32_t occupancy, uint32_t sk_blocks=0xffffffff)
Definition: block_to_ctile_map.hpp:1043
static constexpr StreamKReductionStrategy ReductionStrategy
Definition: block_to_ctile_map.hpp:1024
__device__ auto tile_to_spatial(uint32_t tile_idx, uint32_t m, uint32_t n) const
Definition: block_to_ctile_map.hpp:1282
__device__ uint32_t get_tile_idx(uint32_t iter) const
Definition: block_to_ctile_map.hpp:1274
__host__ __device__ uint32_t get_tile_intersections(uint32_t tiles_, const MDiv &eqav_tiles_) const
Definition: block_to_ctile_map.hpp:1329
__device__ uint32_t get_block_idx() const
Definition: block_to_ctile_map.hpp:1234
__device__ void get_block_itr(uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const
Definition: block_to_ctile_map.hpp:1241
MDiv eqav_tiles_little
Definition: block_to_ctile_map.hpp:1037
uint32_t sk_num_blocks
Definition: block_to_ctile_map.hpp:1029
MDiv eqav_tiles_big
Definition: block_to_ctile_map.hpp:1036
static constexpr uint32_t min_k_iters_per_sk_block
Definition: block_to_ctile_map.hpp:1020
uint32_t sk_num_big_blocks
Definition: block_to_ctile_map.hpp:1030
__host__ __device__ dim3 get_grid_dims() const
Definition: block_to_ctile_map.hpp:1224
__host__ __device__ uint32_t get_workspace_size_for_semaphore() const
Definition: block_to_ctile_map.hpp:1319
Definition: block_to_ctile_map.hpp:270
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:296
__host__ static constexpr __device__ index_t CalculateGridSize(index_t M, index_t N)
Definition: block_to_ctile_map.hpp:281
static constexpr auto I1
Definition: block_to_ctile_map.hpp:272
__host__ bool CheckValidity(const CGridDesc_M_N &) const
Definition: block_to_ctile_map.hpp:290
static constexpr auto I0
Definition: block_to_ctile_map.hpp:271
__host__ __device__ bool ValidCTileIndex(const CTileIdx &, const CTileDim &) const
Definition: block_to_ctile_map.hpp:382
__host__ __device__ BlockToCTileMap_Grouped_M00_N0_M01Adapt(index_t M, index_t N, index_t M01=8)
Definition: block_to_ctile_map.hpp:274
Definition: block_to_ctile_map.hpp:718
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:753
static constexpr auto I2
Definition: block_to_ctile_map.hpp:721
static constexpr auto I0
Definition: block_to_ctile_map.hpp:719
__host__ BlockToCTileMap_KSplit_M00_N00_M01_N01(const CGridDesc_M_N &c_grid_desc_m_n, index_t M01=1, index_t N01=1, index_t KSplit=1)
Definition: block_to_ctile_map.hpp:726
__host__ BlockToCTileMap_KSplit_M00_N00_M01_N01()=default
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:771
static constexpr auto I3
Definition: block_to_ctile_map.hpp:722
static constexpr auto I1
Definition: block_to_ctile_map.hpp:720
__host__ __device__ bool ValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim) const
Definition: block_to_ctile_map.hpp:762
__host__ constexpr __device__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:739
Definition: block_to_ctile_map.hpp:539
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt(const CGridDesc_M_N &c_grid_desc_m_n, index_t M01=8, index_t KSplit=1)
Definition: block_to_ctile_map.hpp:547
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt()=default
static constexpr auto I0
Definition: block_to_ctile_map.hpp:540
static constexpr auto I1
Definition: block_to_ctile_map.hpp:541
__host__ __device__ bool ValidCTileIndex(const CTileIdx &, const CTileDim &) const
Definition: block_to_ctile_map.hpp:592
constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:554
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:565
static constexpr auto I2
Definition: block_to_ctile_map.hpp:542
static constexpr auto I3
Definition: block_to_ctile_map.hpp:543
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &) const
Definition: block_to_ctile_map.hpp:598
Definition: block_to_ctile_map.hpp:615
__host__ __device__ BlockToCTileMap_M00_N00_M01_N01()=default
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:659
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:644
__host__ __device__ BlockToCTileMap_M00_N00_M01_N01(const CGridDesc_M_N &c_grid_desc_m_n, index_t M01=1, index_t N01=1)
Definition: block_to_ctile_map.hpp:623
constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:630
static constexpr auto I0
Definition: block_to_ctile_map.hpp:616
static constexpr auto I3
Definition: block_to_ctile_map.hpp:619
static constexpr auto I1
Definition: block_to_ctile_map.hpp:617
__host__ __device__ bool ValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim) const
Definition: block_to_ctile_map.hpp:650
static constexpr auto I2
Definition: block_to_ctile_map.hpp:618
Definition: block_to_ctile_map.hpp:122
__host__ constexpr __device__ bool ValidCTileIndex(const CTileIdx &, const CTileDim &) const
Definition: block_to_ctile_map.hpp:245
__host__ static constexpr __device__ index_t CalculateGridSize(index_t M, index_t N)
Definition: block_to_ctile_map.hpp:157
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt()=default
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt(BlockToCTileMap_M00_N0_M01Adapt &&)=default
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt & operator=(BlockToCTileMap_M00_N0_M01Adapt &&)=default
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt(const BlockToCTileMap_M00_N0_M01Adapt &)=default
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt & operator=(const BlockToCTileMap_M00_N0_M01Adapt &)=default
static constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n)
Definition: block_to_ctile_map.hpp:166
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt(const CGridDesc_M_N &c_grid_desc_m_n, index_t M01=8)
Definition: block_to_ctile_map.hpp:150
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01Adapt(index_t M, index_t N, index_t M01=8)
Definition: block_to_ctile_map.hpp:138
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:178
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &) const
Definition: block_to_ctile_map.hpp:172
Definition: block_to_ctile_map.hpp:260
Definition: block_to_ctile_map.hpp:24
constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:38
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:66
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01(const CGridDesc_M_N &c_grid_desc_m_n, index_t M01=1)
Definition: block_to_ctile_map.hpp:32
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:51
__host__ constexpr __device__ BlockToCTileMap_M00_N0_M01()=default
__host__ constexpr __device__ bool ValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim) const
Definition: block_to_ctile_map.hpp:57
__host__ bool CheckValidity(const CGridDesc_M_N &) const
Definition: block_to_ctile_map.hpp:449
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt()=default
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt(const CGridDesc_M_N &c_grid_desc_m_n, index_t N01=8)
Definition: block_to_ctile_map.hpp:427
static constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n)
Definition: block_to_ctile_map.hpp:443
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt & operator=(const BlockToCTileMap_N00_M0_N01Adapt &)=default
__host__ __device__ bool ValidCTileIndex(const CTileIdx &, const CTileDim &) const
Definition: block_to_ctile_map.hpp:523
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt & operator=(BlockToCTileMap_N00_M0_N01Adapt &&)=default
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:455
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt(const BlockToCTileMap_N00_M0_N01Adapt &)=default
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt(BlockToCTileMap_N00_M0_N01Adapt &&)=default
__host__ __device__ BlockToCTileMap_N00_M0_N01Adapt(index_t M, index_t N, index_t N01=8)
Definition: block_to_ctile_map.hpp:416
__host__ static constexpr __device__ index_t CalculateGridSize(index_t M, index_t N)
Definition: block_to_ctile_map.hpp:434
Definition: block_to_ctile_map.hpp:397
Definition: magic_division.hpp:207
__host__ __device__ void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t "ient_, uint32_t &remainder_) const
Definition: magic_division.hpp:229
Definition: magic_division.hpp:165
__host__ __device__ void divmod(uint32_t dividend_, uint32_t "ient_, uint32_t &remainder_) const
Definition: magic_division.hpp:197
__host__ __device__ uint32_t div(uint32_t dividend_) const
Definition: magic_division.hpp:191
Definition: block_to_ctile_map.hpp:917
__host__ __device__ bool ValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim) const
Definition: block_to_ctile_map.hpp:937
index_t tile_offset_
Definition: block_to_ctile_map.hpp:957
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:944
UnderlyingBlockToCTileMap block_to_ctile_map_
Definition: block_to_ctile_map.hpp:955
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:930
__host__ __device__ OffsettedBlockToCTileMap2(UnderlyingBlockToCTileMap block_to_ctile_map, index_t group_offset, index_t tile_offset)
Definition: block_to_ctile_map.hpp:920
UnderlyingBlockToCTileMap underlying_type
Definition: block_to_ctile_map.hpp:918
index_t group_offset_
Definition: block_to_ctile_map.hpp:956
__device__ void UpdateTileOffset(index_t offset)
Definition: block_to_ctile_map.hpp:954
__host__ constexpr __device__ index_t CalculateGridSize(index_t M, index_t N) const
Definition: block_to_ctile_map.hpp:949
Definition: block_to_ctile_map.hpp:870
__host__ __device__ bool ValidCTileIndex(const CTileIdx &c_tile_idx, const CTileDim &c_tile_dim) const
Definition: block_to_ctile_map.hpp:888
__host__ constexpr __device__ index_t CalculateGridSize(index_t M, index_t N) const
Definition: block_to_ctile_map.hpp:906
constexpr __host__ bool CheckValidity(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:895
constexpr __host__ index_t CalculateGridSize(const CGridDesc_M_N &c_grid_desc_m_n) const
Definition: block_to_ctile_map.hpp:901
index_t block_start_
Definition: block_to_ctile_map.hpp:912
__host__ constexpr __device__ auto CalculateBottomIndex(const TopIdx &idx_top) const
Definition: block_to_ctile_map.hpp:881
__host__ __device__ OffsettedBlockToCTileMap(UnderlyingBlockToCTileMap block_to_ctile_map, index_t block_start)
Definition: block_to_ctile_map.hpp:873
UnderlyingBlockToCTileMap underlying_type
Definition: block_to_ctile_map.hpp:871
UnderlyingBlockToCTileMap block_to_ctile_map_
Definition: block_to_ctile_map.hpp:911
Definition: sequence.hpp:43
Definition: integral_constant.hpp:10