DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference
#include <device_grouped_conv_bwd_weight_wmma_cshuffle.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle |
| using | ADataType = OutDataType |
| using | BDataType = InDataType |
| using | CDataType = WeiDataType |
| using | AElementwiseOperation = OutElementwiseOperation |
| using | BElementwiseOperation = InElementwiseOperation |
| using | CElementwiseOperation = WeiElementwiseOperation |
| using | ABDataType = InDataType |
| using | ABCGridDescs = decltype(GetABCGridDesc< NDimSpatial >()) |
| using | AGridDesc_K0_M_K1 = remove_cvref_t< decltype(ABCGridDescs{}[I0])> |
| using | BGridDesc_K0_N_K1 = remove_cvref_t< decltype(ABCGridDescs{}[I1])> |
| using | CGridDesc_M_N = remove_cvref_t< decltype(ABCGridDescs{}[I2])> |
| using | CShuffleDataType = AccDataType |
| using | GridwiseGemm = GridwiseGemmMultipleD_Wmma< ADataType, BDataType, AccDataType, CShuffleDataType, Tuple<>, CDataType, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, Tuple<>, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, InMemoryDataOperationEnum::Set, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, K1, MRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, true, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, true, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer > |
| using | DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(Tuple<>{})) |
| using | CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(CGridDesc_M_N{})) |
| using | Block2CTileMap = decltype(GridwiseGemm::MakeDefaultBlock2CTileMap(CGridDesc_M_N{}, I1, I1)) |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false> | |
| constexpr static auto | make_out_grid_desc (const index_t N, const index_t Do, const index_t Ho, const index_t Wo, const index_t K, const std::array< index_t, NDimSpatial+3 > &output_strides) |
| template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false> | |
| constexpr static auto | make_in_grid_desc (const index_t N, const index_t Di, const index_t Hi, const index_t Wi, const index_t C, const std::array< index_t, NDimSpatial+3 > &input_strides) |
| template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false> | |
| constexpr static auto | make_wei_grid_desc (const index_t K, const index_t Z, const index_t Y, const index_t X, const index_t C, const std::array< index_t, NDimSpatial+3 > &weights_strides) |
| template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false> | |
| static auto | MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N (const index_t N, const index_t K, const index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &input_strides, const std::array< index_t, NDimSpatial+3 > &weights_strides, const std::array< index_t, NDimSpatial+3 > &output_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads) |
| template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false> | |
| static auto | GetABCGridDesc () |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &arg) |
| static auto | MakeArgument (const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k) |
| static auto | MakeInvoker () |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | GemmK1Number = Number<K1>{} |
| static constexpr index_t | KPerBlock = K0PerBlock * GemmK1Number |
Member Typedef Documentation
◆ ABCGridDescs
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABCGridDescs = decltype(GetABCGridDesc<NDimSpatial>()) |
◆ ABDataType
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABDataType = InDataType |
◆ ADataType
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ADataType = OutDataType |
◆ AElementwiseOperation
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AElementwiseOperation = OutElementwiseOperation |
◆ AGridDesc_K0_M_K1
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AGridDesc_K0_M_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I0])> |
◆ BDataType
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BDataType = InDataType |
◆ BElementwiseOperation
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BElementwiseOperation = InElementwiseOperation |
◆ BGridDesc_K0_N_K1
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BGridDesc_K0_N_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I1])> |
◆ Block2CTileMap
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::Block2CTileMap = decltype(GridwiseGemm::MakeDefaultBlock2CTileMap( CGridDesc_M_N{}, I1 , I1 )) |
◆ CDataType
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CDataType = WeiDataType |
◆ CElementwiseOperation
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CElementwiseOperation = WeiElementwiseOperation |
◆ CGridDesc_M_N
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_M_N = remove_cvref_t<decltype(ABCGridDescs{}[I2])> |
◆ CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock( CGridDesc_M_N{})) |
◆ CShuffleDataType
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CShuffleDataType = AccDataType |
◆ DeviceOp
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle |
◆ DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(Tuple<>{})) |
◆ GridwiseGemm
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
| using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GridwiseGemm = GridwiseGemmMultipleD_Wmma< ADataType, BDataType, AccDataType, CShuffleDataType, Tuple<>, CDataType, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, Tuple<>, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, InMemoryDataOperationEnum::Set, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, K1, MRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, true, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, true, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer> |
Member Function Documentation
◆ GetABCGridDesc()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
|
inlinestatic |
◆ GetTypeString()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlinestaticconstexpr |
◆ make_in_grid_desc()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
|
inlinestaticconstexpr |
◆ make_out_grid_desc()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
|
inlinestaticconstexpr |
◆ make_wei_grid_desc()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
|
inlinestaticconstexpr |
◆ MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
|
inlinestatic |
◆ MakeArgument()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlinestatic |
◆ MakeArgumentPointer()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlineoverridevirtual |
◆ MakeInvoker()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlinestatic |
◆ MakeInvokerPointer()
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
inlineoverridevirtual |
Member Data Documentation
◆ GemmK1Number
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I0
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I1
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I2
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I3
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I4
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ I5
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
◆ KPerBlock
template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp
Public Member Functions inherited from