GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference

GridwiseSparseEmbeddingsForwardLayernorm&lt; EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference
ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference

#include <gridwise_sparse_embeddings_forward_layernorm.hpp>

Public Types

using ThreadwiseWolfordDesc2D = decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< DimSubBlocks *DimThreadSize >{}, Number< RowSubBlocks *RowVectorSize >{})))
 
using ThreadwiseWolfordDescReduce = decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< DimSubBlocks *DimThreadSize >{})))
 
using ThreadwiseWelford = ThreadwiseWelford< AccDataType, ThreadwiseWolfordDesc2D, ThreadwiseWolfordDescReduce >
 
using ThreadClusterLength = Sequence< DimClusterSize, RowClusterSize >
 
using BlockwiseWelford = BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLength, Sequence< 0, 1 > >
 

Static Public Member Functions

static __device__ void Run (OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op)
 

Static Public Attributes

static constexpr auto I0 = Number<0>{}
 
static constexpr auto I1 = Number<1>{}
 
static constexpr auto I2 = Number<2>{}
 
static constexpr auto I3 = Number<3>{}
 
static constexpr index_t WaveSize = 64
 
static constexpr auto DimSubBlocks = DimPerBlock / (DimClusterSize * DimThreadSize)
 
static constexpr auto RowSubBlocks = RowPerBlock / (RowClusterSize * RowVectorSize)
 
static constexpr auto DimPerSubBlock = DimPerBlock / DimSubBlocks
 
static constexpr auto RowPerSubBlock = RowPerBlock / RowSubBlocks
 

Member Typedef Documentation

◆ BlockwiseWelford

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::BlockwiseWelford = BlockwiseWelford<AccDataType, BlockSize, ThreadClusterLength, Sequence<0, 1> >

◆ ThreadClusterLength

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize>

◆ ThreadwiseWelford

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWelford = ThreadwiseWelford<AccDataType, ThreadwiseWolfordDesc2D, ThreadwiseWolfordDescReduce>

◆ ThreadwiseWolfordDesc2D

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDesc2D = decltype(make_naive_tensor_descriptor_packed(make_tuple( Number<DimSubBlocks * DimThreadSize>{}, Number<RowSubBlocks * RowVectorSize>{})))

◆ ThreadwiseWolfordDescReduce

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDescReduce = decltype(make_naive_tensor_descriptor_packed( make_tuple(Number<DimSubBlocks * DimThreadSize>{})))

Member Function Documentation

◆ Run()

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
static __device__ void ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::Run ( OutType *  p_out,
const ck::Array< EmbType *, NumEmbeddings >  p_embs,
const ck::Array< IndexType *, NumEmbeddings >  p_indexes,
const GammaDataType *  p_gamma,
const BetaDataType *  p_beta,
const  OutGridDesc,
const AccDataType  epsilon,
const EmbElementwiseOperation  emb_elementwise_op 
)
inlinestatic

Member Data Documentation

◆ DimPerSubBlock

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimPerSubBlock = DimPerBlock / DimSubBlocks
staticconstexpr

◆ DimSubBlocks

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimSubBlocks = DimPerBlock / (DimClusterSize * DimThreadSize)
staticconstexpr

◆ I0

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I3 = Number<3>{}
staticconstexpr

◆ RowPerSubBlock

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowPerSubBlock = RowPerBlock / RowSubBlocks
staticconstexpr

◆ RowSubBlocks

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowSubBlocks = RowPerBlock / (RowClusterSize * RowVectorSize)
staticconstexpr

◆ WaveSize

template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr index_t ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::WaveSize = 64
staticconstexpr

The documentation for this struct was generated from the following file: