BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference

BlockwiseWelford&lt; T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference
ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference

#include <blockwise_welford.hpp>

Static Public Member Functions

template<typename CountDataType >
static __device__ void Merge (T &mean_a, T &var_a, CountDataType &count_a, T mean_b, T var_b, CountDataType count_b)
 
template<typename CountDataType >
static __device__ void Run (T &mean_value, T &var_value, CountDataType &count)
 

Static Public Attributes

static constexpr auto BufferLength_M = ThreadClusterLengths_M_K::At(0)
 
static constexpr auto BufferLength_K = ThreadClusterLengths_M_K::At(1)
 
static constexpr auto block_buf_desc_m_k
 
static constexpr auto thread_cluster_desc
 

Member Function Documentation

◆ Merge()

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
template<typename CountDataType >
static __device__ void ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::Merge ( T &  mean_a,
T &  var_a,
CountDataType &  count_a,
mean_b,
var_b,
CountDataType  count_b 
)
inlinestatic

◆ Run()

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
template<typename CountDataType >
static __device__ void ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::Run ( T &  mean_value,
T &  var_value,
CountDataType &  count 
)
inlinestatic

Member Data Documentation

◆ block_buf_desc_m_k

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
constexpr auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::block_buf_desc_m_k
staticconstexpr
Initial value:
make_tuple(Number<BufferLength_M>{}, Number<BufferLength_K>{}))
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211

◆ BufferLength_K

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
constexpr auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::BufferLength_K = ThreadClusterLengths_M_K::At(1)
staticconstexpr

◆ BufferLength_M

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
constexpr auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::BufferLength_M = ThreadClusterLengths_M_K::At(0)
staticconstexpr

◆ thread_cluster_desc

template<typename T , index_t BlockSize, typename ThreadClusterLengths_M_K , typename ThreadClusterArrangeOrder , bool GetActualVariance = true>
constexpr auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::thread_cluster_desc
staticconstexpr
Initial value:
=
make_cluster_descriptor(ThreadClusterLengths_M_K{}, ThreadClusterArrangeOrder{})
__host__ constexpr __device__ auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition: cluster_descriptor.hpp:13

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp