/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/reduce/kernel/multi_reduce2d_tile_partitioner.hpp Source File#
multi_reduce2d_tile_partitioner.hpp
Go to the documentation of this file.
Definition: cluster_descriptor.hpp:13
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition: amd_buffer_addressing.hpp:36
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
TilePartitioner for 2D reduction operations.
Definition: multi_reduce2d_tile_partitioner.hpp:13
CK_TILE_DEVICE auto GetInputTileOffsets(const index_t block_global_idx, const index_t block_group_size, const index_t num_iterations) const -> tuple< index_t, index_t >
Compute the input tile offset for the given thread, block index.
Definition: multi_reduce2d_tile_partitioner.hpp:101
CK_TILE_HOST_DEVICE auto GetBlockGroupParams() const noexcept -> tuple< index_t, index_t >
Calculate the number of iterations and the number of blocks required to perform the reduction.
Definition: multi_reduce2d_tile_partitioner.hpp:53
CK_TILE_DEVICE auto GetOutputTileIndexMultiBlock(index_t block_global_idx, index_t block_group_size) const noexcept -> tuple< index_t, index_t >
Get output tile index and block local ID for multi-block reduction.
Definition: multi_reduce2d_tile_partitioner.hpp:42
CK_TILE_DEVICE auto GetOutputTileIndex(index_t block_idx) const noexcept -> index_t
Get output tile index for threadwise reduction.
Definition: multi_reduce2d_tile_partitioner.hpp:32
static constexpr index_t NPerBlock
Definition: multi_reduce2d_tile_partitioner.hpp:19
static constexpr index_t MPerBlock
Definition: multi_reduce2d_tile_partitioner.hpp:18
remove_cvref_t< BlockShape_ > BlockShape
Definition: multi_reduce2d_tile_partitioner.hpp:14
static constexpr bool ForceMultiBlock
Definition: multi_reduce2d_tile_partitioner.hpp:16
CK_TILE_DEVICE index_t GetOutputTileOffset(const index_t block_group_id) const
Compute the output tile offset for the given operation and block group.
Definition: multi_reduce2d_tile_partitioner.hpp:117
CK_TILE_HOST_DEVICE Reduce2dTilePartitioner() noexcept=delete
Definition: tuple.hpp:192