BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference#
ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference
#include <blockwise_welford.hpp>
Static Public Member Functions | |
template<typename CountDataType > | |
static __device__ void | Merge (T &mean_a, T &var_a, CountDataType &count_a, T mean_b, T var_b, CountDataType count_b) |
template<typename CountDataType > | |
static __device__ void | Run (T &mean_value, T &var_value, CountDataType &count) |
Static Public Attributes | |
static constexpr auto | BufferLength_M = ThreadClusterLengths_M_K::At(0) |
static constexpr auto | BufferLength_K = ThreadClusterLengths_M_K::At(1) |
static constexpr auto | block_buf_desc_m_k |
static constexpr auto | thread_cluster_desc |
Member Function Documentation
◆ Merge()
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
template<typename CountDataType >
|
inlinestatic |
◆ Run()
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
template<typename CountDataType >
|
inlinestatic |
Member Data Documentation
◆ block_buf_desc_m_k
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
|
staticconstexpr |
Initial value:
make_tuple(Number<BufferLength_M>{}, Number<BufferLength_K>{}))
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
◆ BufferLength_K
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
|
staticconstexpr |
◆ BufferLength_M
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
|
staticconstexpr |
◆ thread_cluster_desc
template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
|
staticconstexpr |
Initial value:
=
make_cluster_descriptor(ThreadClusterLengths_M_K{}, ThreadClusterArrangeOrder{})
__host__ constexpr __device__ auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition: cluster_descriptor.hpp:13
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp