PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation > Struct Template Reference

PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation > Struct Template Reference#

Composable Kernel: ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation > Struct Template Reference

#include <reduction_functions_blockwise.hpp>

Static Public Member Functions
template<typename BufferType >
static __device__ void	Reduce (BufferType &work_buffer, AccDataType &in_out_value)

Static Public Attributes
static constexpr auto	BufferLength_M = ThreadClusterLengths_M_K::At(0)

static constexpr auto	BufferLength_K = ThreadClusterLengths_M_K::At(1)

static constexpr auto	block_buf_desc_m_k

static constexpr auto	thread_cluster_desc

Member Function Documentation

◆ Reduce()

template<typename AccDataType , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , typename OpReduce , bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>

template<typename BufferType >

static __device__ void ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation >::Reduce	(	BufferType &	work_buffer,
		AccDataType &	in_out_value
	)

inlinestatic

Member Data Documentation

◆ block_buf_desc_m_k

template<typename AccDataType , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , typename OpReduce , bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>

constexpr auto ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation >::block_buf_desc_m_k

staticconstexpr

Initial value:

= make_naive_tensor_descriptor_packed(

make_tuple(Number<BufferLength_M>{}, Number<BufferLength_K>{}))

ck::make_naive_tensor_descriptor_packed

__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)

Definition: tensor_descriptor_helper.hpp:101

ck::make_tuple

__host__ constexpr __device__ auto make_tuple(Xs &&... xs)

Definition: tuple.hpp:211

◆ BufferLength_K

template<typename AccDataType , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , typename OpReduce , bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>

constexpr auto ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation >::BufferLength_K = ThreadClusterLengths_M_K::At(1)

staticconstexpr

◆ BufferLength_M

template<typename AccDataType , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , typename OpReduce , bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>

constexpr auto ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation >::BufferLength_M = ThreadClusterLengths_M_K::At(0)

staticconstexpr

◆ thread_cluster_desc

template<typename AccDataType , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , typename OpReduce , bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>

constexpr auto ck::PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation >::thread_cluster_desc

staticconstexpr

Initial value:

=

make_cluster_descriptor(ThreadClusterLengths_M_K{}, ThreadClusterArrangeOrder{})

ck::make_cluster_descriptor

__host__ constexpr __device__ auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})

Definition: cluster_descriptor.hpp:13

The documentation for this struct was generated from the following file:

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp

PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation > Struct Template Reference

PartitionedBlockwiseReduction&lt; AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation &gt; Struct Template Reference#

Static Public Member Functions

Static Public Attributes

Member Function Documentation

◆ Reduce()

Member Data Documentation

◆ block_buf_desc_m_k

◆ BufferLength_K

◆ BufferLength_M

◆ thread_cluster_desc

PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, OpReduce, PropagateNan, Accumulation > Struct Template Reference#