/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.2/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp Source File#
reduction_functions_blockwise.hpp
Go to the documentation of this file.
14 // 1) work_buffer is buffer (typically LDS) allocated outside as workspace, does not include any in/out data
87 // 1) work_buffer is buffer (typically LDS) allocated outside as workspace, does not include any in/out data
159 // 1) work_val_buffer/work_idx_buffer is buffer (typically LDS) allocated outside as workspace, does not include any in/out data
160 // 2) work_val_buffer/work_idx_buffer has AccDataType/IndexDataType elements, and space size is no less than BlockSize
Definition: ck.hpp:264
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition: cluster_descriptor.hpp:13
Definition: reduction_functions_blockwise.hpp:101
static constexpr auto BufferLength_M
Definition: reduction_functions_blockwise.hpp:105
static constexpr auto thread_cluster_desc
Definition: reduction_functions_blockwise.hpp:113
static constexpr auto block_buf_desc_m_k
Definition: reduction_functions_blockwise.hpp:110
static __device__ void Reduce(BufferType &work_buffer, AccDataType &in_out_value)
Definition: reduction_functions_blockwise.hpp:116
static constexpr auto BufferLength_K
Definition: reduction_functions_blockwise.hpp:106
Definition: reduction_functions_blockwise.hpp:28
static constexpr auto BufferLength_K
Definition: reduction_functions_blockwise.hpp:33
static constexpr auto block_buf_desc_m_k
Definition: reduction_functions_blockwise.hpp:37
static constexpr auto thread_cluster_desc
Definition: reduction_functions_blockwise.hpp:40
static __device__ void Reduce(BufferType &work_buffer, AccDataType &in_out_value)
Definition: reduction_functions_blockwise.hpp:44
static constexpr auto BufferLength_M
Definition: reduction_functions_blockwise.hpp:32
Definition: reduction_functions_blockwise.hpp:175
static constexpr auto BufferLength_K
Definition: reduction_functions_blockwise.hpp:180
static constexpr auto block_buf_desc_m_k
Definition: reduction_functions_blockwise.hpp:184
static constexpr auto thread_cluster_desc
Definition: reduction_functions_blockwise.hpp:187
static __device__ void Reduce(BufferType &work_val_buffer, IdxBufferType &work_idx_buffer, AccDataType &in_out_value, IndexDataType &in_out_index)
Definition: reduction_functions_blockwise.hpp:192
static constexpr auto BufferLength_M
Definition: reduction_functions_blockwise.hpp:179
Definition: integral_constant.hpp:10
Definition: type.hpp:177
Definition: functional2.hpp:31