DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

DeviceNormalizationBwdGammaBetaImpl&lt; DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference
ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

#include <device_normalization_bwd_gamma_beta_impl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >:
ck::tensor_operation::device::DeviceNormalizationBwdGammaBeta< DYDataType, XDataType, MeanInvStdDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
 
struct  Invoker
 

Public Types

using GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1))
 
using GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1}))
 
using GridwiseNormalizationBwdGammaBeta = GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >
 

Public Member Functions

template<index_t SrcVectorDim, index_t SrcVectorSize>
bool IsSrcVectorDimSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides)
 
template<index_t DstVectorSize>
bool IsDstVectorSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides)
 
bool IsSupportedArgument (const BaseArgument *p_arg) override
 
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::vector< index_t > inLengths, const std::vector< index_t > dyStrides, const std::vector< index_t > xStrides, const std::vector< index_t > meanStrides, const std::vector< index_t > invStdStrides, const std::vector< index_t > outLengths, const std::vector< index_t > dgammaStrides, const std::vector< index_t > dbetaStrides, const std::vector< index_t > reduceDims, const void *p_dy, const void *p_x, const void *p_mean, const void *p_invStd, void *p_dgamma, void *p_dbeta) override
 
virtual std::unique_ptr< BaseInvokerMakeInvokerPointer () override
 
std::string GetTypeString () const override
 
- Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 
 BaseOperator (const BaseOperator &)=default
 
BaseOperatoroperator= (const BaseOperator &)=default
 
virtual std::string GetTypeIdName () const
 
virtual std::optional< std::string > GetObjectName () const
 
virtual std::optional< std::string > GetTemplateInfo () const
 
virtual std::string GetTypeIdHashCode () const
 
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
 
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
 
virtual ~BaseOperator ()
 

Static Public Member Functions

static auto MakeSrc2dDescriptor (const std::vector< index_t > &inLengths, const std::vector< index_t > &inStrides, int numBlockTileIteration)
 
static auto MakeDst1dDescriptor (const std::vector< index_t > &outLengths, const std::vector< index_t > &outStrides)
 

Static Public Attributes

static constexpr index_t DYSrcVectorDim = IsDYFastestDimReduced ? 1 : 0
 
static constexpr index_t XSrcVectorDim = IsXFastestDimReduced ? 1 : 0
 
static constexpr index_t MeanInvStdSrcVectorDim = IsMeanInvStdFastestDimReduced ? 1 : 0
 
static constexpr index_t NumInvariantDim = Rank - NumReduceDim
 
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
 
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
 
static constexpr bool reduceAllDim = (NumInvariantDim == 0)
 

Member Typedef Documentation

◆ GridDesc_M

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1}))

◆ GridDesc_M_K

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1))

◆ GridwiseNormalizationBwdGammaBeta

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridwiseNormalizationBwdGammaBeta = GridwiseNormalizationBwdGammaBeta_mk_to_k<DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize>

Member Function Documentation

◆ GetTypeString()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
std::string ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsDstVectorSizeValid()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t DstVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsDstVectorSizeValid ( const std::vector< index_t > &  lengths,
const std::vector< index_t > &  strides 
)
inline

◆ IsSrcVectorDimSizeValid()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t SrcVectorDim, index_t SrcVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsSrcVectorDimSizeValid ( const std::vector< index_t > &  lengths,
const std::vector< index_t > &  strides 
)
inline

◆ IsSupportedArgument()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsSupportedArgument ( const BaseArgument p_arg)
inlineoverridevirtual

◆ MakeArgumentPointer()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
std::unique_ptr<BaseArgument> ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeArgumentPointer ( const std::vector< index_t inLengths,
const std::vector< index_t dyStrides,
const std::vector< index_t xStrides,
const std::vector< index_t meanStrides,
const std::vector< index_t invStdStrides,
const std::vector< index_t outLengths,
const std::vector< index_t dgammaStrides,
const std::vector< index_t dbetaStrides,
const std::vector< index_t reduceDims,
const void *  p_dy,
const void *  p_x,
const void *  p_mean,
const void *  p_invStd,
void *  p_dgamma,
void *  p_dbeta 
)
inlineoverridevirtual

◆ MakeDst1dDescriptor()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
static auto ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeDst1dDescriptor ( const std::vector< index_t > &  outLengths,
const std::vector< index_t > &  outStrides 
)
inlinestatic

◆ MakeInvokerPointer()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
virtual std::unique_ptr<BaseInvoker> ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeSrc2dDescriptor()

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
static auto ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeSrc2dDescriptor ( const std::vector< index_t > &  inLengths,
const std::vector< index_t > &  inStrides,
int  numBlockTileIteration 
)
inlinestatic

Member Data Documentation

◆ DYSrcVectorDim

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::DYSrcVectorDim = IsDYFastestDimReduced ? 1 : 0
staticconstexpr

◆ K_BlockTileSize

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ MeanInvStdSrcVectorDim

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MeanInvStdSrcVectorDim = IsMeanInvStdFastestDimReduced ? 1 : 0
staticconstexpr

◆ NumInvariantDim

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::NumInvariantDim = Rank - NumReduceDim
staticconstexpr

◆ reduceAllDim

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::reduceAllDim = (NumInvariantDim == 0)
staticconstexpr

◆ XSrcVectorDim

template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::XSrcVectorDim = IsXFastestDimReduced ? 1 : 0
staticconstexpr

The documentation for this struct was generated from the following file: