DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference
#include <device_normalization_bwd_gamma_beta_impl.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1)) |
| using | GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1})) |
| using | GridwiseNormalizationBwdGammaBeta = GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > |
Public Member Functions | |
| template<index_t SrcVectorDim, index_t SrcVectorSize> | |
| bool | IsSrcVectorDimSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides) |
| template<index_t DstVectorSize> | |
| bool | IsDstVectorSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides) |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const std::vector< index_t > inLengths, const std::vector< index_t > dyStrides, const std::vector< index_t > xStrides, const std::vector< index_t > meanStrides, const std::vector< index_t > invStdStrides, const std::vector< index_t > outLengths, const std::vector< index_t > dgammaStrides, const std::vector< index_t > dbetaStrides, const std::vector< index_t > reduceDims, const void *p_dy, const void *p_x, const void *p_mean, const void *p_invStd, void *p_dgamma, void *p_dbeta) override |
| virtual std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static auto | MakeSrc2dDescriptor (const std::vector< index_t > &inLengths, const std::vector< index_t > &inStrides, int numBlockTileIteration) |
| static auto | MakeDst1dDescriptor (const std::vector< index_t > &outLengths, const std::vector< index_t > &outStrides) |
Static Public Attributes | |
| static constexpr index_t | DYSrcVectorDim = IsDYFastestDimReduced ? 1 : 0 |
| static constexpr index_t | XSrcVectorDim = IsXFastestDimReduced ? 1 : 0 |
| static constexpr index_t | MeanInvStdSrcVectorDim = IsMeanInvStdFastestDimReduced ? 1 : 0 |
| static constexpr index_t | NumInvariantDim = Rank - NumReduceDim |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | K_BlockTileSize = KThreadClusterSize * KThreadSliceSize |
| static constexpr bool | reduceAllDim = (NumInvariantDim == 0) |
Member Typedef Documentation
◆ GridDesc_M
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
| using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1})) |
◆ GridDesc_M_K
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
| using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1)) |
◆ GridwiseNormalizationBwdGammaBeta
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
| using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridwiseNormalizationBwdGammaBeta = GridwiseNormalizationBwdGammaBeta_mk_to_k<DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize> |
Member Function Documentation
◆ GetTypeString()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsDstVectorSizeValid()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t DstVectorSize>
|
inline |
◆ IsSrcVectorDimSizeValid()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t SrcVectorDim, index_t SrcVectorSize>
|
inline |
◆ IsSupportedArgument()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ MakeArgumentPointer()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlineoverridevirtual |
◆ MakeDst1dDescriptor()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlineoverridevirtual |
◆ MakeSrc2dDescriptor()
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
inlinestatic |
Member Data Documentation
◆ DYSrcVectorDim
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ K_BlockTileSize
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ M_BlockTileSize
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ MeanInvStdSrcVectorDim
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ NumInvariantDim
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ reduceAllDim
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
◆ XSrcVectorDim
template<typename DYDataType , typename XDataType , typename MeanInvStdDataType , typename ComputeDataType , typename DGammaDataType , typename DBetaDataType , index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp
Public Member Functions inherited from