#include <gridwise_sparse_embeddings_forward_layernorm.hpp>
|
| static __device__ void | Run (OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op) |
| |
◆ BlockwiseWelford
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::BlockwiseWelford = BlockwiseWelford<AccDataType, BlockSize, ThreadClusterLength, Sequence<0, 1> > |
◆ ThreadClusterLength
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize> |
◆ ThreadwiseWelford
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWelford = ThreadwiseWelford<AccDataType, ThreadwiseWolfordDesc2D, ThreadwiseWolfordDescReduce> |
◆ ThreadwiseWolfordDesc2D
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDesc2D = decltype(make_naive_tensor_descriptor_packed(make_tuple( Number<DimSubBlocks * DimThreadSize>{}, Number<RowSubBlocks * RowVectorSize>{}))) |
◆ ThreadwiseWolfordDescReduce
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDescReduce = decltype(make_naive_tensor_descriptor_packed( make_tuple(Number<DimSubBlocks * DimThreadSize>{}))) |
◆ Run()
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| static __device__ void ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::Run |
( |
OutType * |
p_out, |
|
|
const ck::Array< EmbType *, NumEmbeddings > |
p_embs, |
|
|
const ck::Array< IndexType *, NumEmbeddings > |
p_indexes, |
|
|
const GammaDataType * |
p_gamma, |
|
|
const BetaDataType * |
p_beta, |
|
|
const |
OutGridDesc, |
|
|
const AccDataType |
epsilon, |
|
|
const EmbElementwiseOperation |
emb_elementwise_op |
|
) |
| |
|
inlinestatic |
◆ DimPerSubBlock
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimPerSubBlock = DimPerBlock / DimSubBlocks |
|
staticconstexpr |
◆ DimSubBlocks
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimSubBlocks = DimPerBlock / (DimClusterSize * DimThreadSize) |
|
staticconstexpr |
◆ I0
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I2
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I3 = Number<3>{} |
|
staticconstexpr |
◆ RowPerSubBlock
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowPerSubBlock = RowPerBlock / RowSubBlocks |
|
staticconstexpr |
◆ RowSubBlocks
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowSubBlocks = RowPerBlock / (RowClusterSize * RowVectorSize) |
|
staticconstexpr |
◆ WaveSize
template<typename EmbType , typename IndexType , typename GammaDataType , typename BetaDataType , typename AccDataType , typename OutType , typename OutGridDesc , typename EmbElementwiseOperation , ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
| constexpr index_t ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::WaveSize = 64 |
|
staticconstexpr |
The documentation for this struct was generated from the following file: