DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer > Struct Template Reference
#include <device_gemm_wmma.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | AGridDesc = decltype(MakeAGridDescriptor(1, 1, 1)) |
| using | BGridDesc = decltype(MakeBGridDescriptor(1, 1, 1)) |
| using | CGridDesc_M_N = decltype(MakeCGridDescriptor_M_N(1, 1, 1)) |
| using | GridwiseGemm = GridwiseGemm_Wmma< BlockSize, ADataType, BDataType, AccDataType, CShuffleDataType, CDataType, InMemoryDataOperationEnum::Set, AGridDesc, BGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, AEnableLds, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, BEnableLds, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumPrefetch, LoopSched, PipelineVer > |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static auto | MakeAGridDescriptor (index_t MRaw, index_t KRaw, index_t StrideA) |
| static auto | MakeBGridDescriptor (index_t KRaw, index_t NRaw, index_t StrideB) |
| static auto | MakeCGridDescriptor_M_N (index_t MRaw, index_t NRaw, index_t StrideC) |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &arg) |
| static auto | MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) |
| static auto | MakeInvoker () |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | I6 = Number<6>{} |
| static constexpr auto | K1Number = Number<K1>{} |
| static constexpr auto | MWaves = MPerBlock / (MRepeat * MPerWmma) |
| static constexpr auto | NWaves = NPerBlock / (NRepeat * NPerWmma) |
| static constexpr auto | WmmaK = K1 == 16 ? 32 : 16 |
| static constexpr auto | MaxVectorLoadA = K1 * sizeof(ADataType) == 16 ? true : false |
| static constexpr auto | MaxVectorLoadB = K1 * sizeof(BDataType) == 16 ? true : false |
| static constexpr auto | AEnableLds_auto |
| static constexpr auto | BEnableLds_auto |
| static constexpr auto | AEnableLds_manu = false |
| static constexpr auto | BEnableLds_manu = false |
| static constexpr auto | AEnableLds = AEnableLds_auto || AEnableLds_manu || (NumPrefetch > 1) |
| static constexpr auto | BEnableLds = BEnableLds_auto || BEnableLds_manu || (NumPrefetch > 1) |
| static constexpr auto | matrix_padder |
Member Typedef Documentation
◆ AGridDesc
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >::AGridDesc = decltype(MakeAGridDescriptor(1, 1, 1)) |
◆ BGridDesc
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >::BGridDesc = decltype(MakeBGridDescriptor(1, 1, 1)) |
◆ CGridDesc_M_N
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >::CGridDesc_M_N = decltype(MakeCGridDescriptor_M_N(1, 1, 1)) |
◆ GridwiseGemm
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmWmma_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >::GridwiseGemm = GridwiseGemm_Wmma<BlockSize, ADataType, BDataType, AccDataType, CShuffleDataType, CDataType, InMemoryDataOperationEnum::Set, AGridDesc, BGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, AEnableLds, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, BEnableLds, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumPrefetch, LoopSched, PipelineVer> |
Member Function Documentation
◆ GetTypeString()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeAGridDescriptor()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeArgument()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
◆ MakeBGridDescriptor()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeCGridDescriptor_M_N()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeInvoker()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
Member Data Documentation
◆ AEnableLds
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ AEnableLds_auto
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
Initial value:
= (NWaves == 1 && (MaxVectorLoadA || MRepeat == 1) &&
? false
: true
static constexpr bool value
Definition: integral_constant.hpp:21
static constexpr auto MaxVectorLoadA
Definition: device_gemm_wmma.hpp:90
static constexpr auto NWaves
Definition: device_gemm_wmma.hpp:88
◆ AEnableLds_manu
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ BEnableLds
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ BEnableLds_auto
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
(MWaves == 1 && (MaxVectorLoadB || NRepeat == 1) &&
? false
: true
static constexpr auto MWaves
Definition: device_gemm_wmma.hpp:87
static constexpr auto MaxVectorLoadB
Definition: device_gemm_wmma.hpp:91
◆ BEnableLds_manu
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I0
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I1
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I2
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I3
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I4
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I5
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ I6
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ K1Number
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ matrix_padder
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
MatrixPadder<GemmSpec, index_t, index_t, index_t>{MPerBlock, NPerBlock, KPerBlock}
◆ MaxVectorLoadA
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ MaxVectorLoadB
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ MWaves
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ NWaves
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
◆ WmmaK
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename CShuffleDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t K1, ck::index_t MPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp
Public Member Functions inherited from