DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference

DeviceGroupedConvBwdWeight_Wmma_CShuffle&lt; NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference
ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference

#include <device_grouped_conv_bwd_weight_wmma_cshuffle.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >:
ck::tensor_operation::device::DeviceGroupedConvBwdWeight< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ComputeTypeA, ComputeTypeB > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
 
struct  Invoker
 

Public Types

using DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle
 
using ADataType = OutDataType
 
using BDataType = InDataType
 
using CDataType = WeiDataType
 
using AElementwiseOperation = OutElementwiseOperation
 
using BElementwiseOperation = InElementwiseOperation
 
using CElementwiseOperation = WeiElementwiseOperation
 
using ABDataType = InDataType
 
using ABCGridDescs = decltype(GetABCGridDesc< NDimSpatial >())
 
using AGridDesc_K0_M_K1 = remove_cvref_t< decltype(ABCGridDescs{}[I0])>
 
using BGridDesc_K0_N_K1 = remove_cvref_t< decltype(ABCGridDescs{}[I1])>
 
using CGridDesc_M_N = remove_cvref_t< decltype(ABCGridDescs{}[I2])>
 
using CShuffleDataType = AccDataType
 
using GridwiseGemm = GridwiseGemmMultipleD_Wmma< ADataType, BDataType, AccDataType, CShuffleDataType, Tuple<>, CDataType, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, Tuple<>, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, InMemoryDataOperationEnum::Set, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, K1, MRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, true, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, true, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer >
 
using DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(Tuple<>{}))
 
using CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(CGridDesc_M_N{}))
 
using Block2CTileMap = decltype(GridwiseGemm::MakeDefaultBlock2CTileMap(CGridDesc_M_N{}, I1, I1))
 

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
 
std::unique_ptr< BaseArgumentMakeArgumentPointer (const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k) override
 
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
 
std::string GetTypeString () const override
 
- Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 
 BaseOperator (const BaseOperator &)=default
 
BaseOperatoroperator= (const BaseOperator &)=default
 
virtual std::string GetTypeIdName () const
 
virtual std::optional< std::string > GetObjectName () const
 
virtual std::optional< std::string > GetTemplateInfo () const
 
virtual std::string GetTypeIdHashCode () const
 
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
 
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
 
virtual ~BaseOperator ()
 

Static Public Member Functions

template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto make_out_grid_desc (const index_t N, const index_t Do, const index_t Ho, const index_t Wo, const index_t K, const std::array< index_t, NDimSpatial+3 > &output_strides)
 
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto make_in_grid_desc (const index_t N, const index_t Di, const index_t Hi, const index_t Wi, const index_t C, const std::array< index_t, NDimSpatial+3 > &input_strides)
 
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto make_wei_grid_desc (const index_t K, const index_t Z, const index_t Y, const index_t X, const index_t C, const std::array< index_t, NDimSpatial+3 > &weights_strides)
 
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N (const index_t N, const index_t K, const index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &input_strides, const std::array< index_t, NDimSpatial+3 > &weights_strides, const std::array< index_t, NDimSpatial+3 > &output_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
 
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto GetABCGridDesc ()
 
static constexpr bool IsValidCompilationParameter ()
 
static bool IsSupportedArgument (const Argument &arg)
 
static auto MakeArgument (const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k)
 
static auto MakeInvoker ()
 

Static Public Attributes

static constexpr auto I0 = Number<0>{}
 
static constexpr auto I1 = Number<1>{}
 
static constexpr auto I2 = Number<2>{}
 
static constexpr auto I3 = Number<3>{}
 
static constexpr auto I4 = Number<4>{}
 
static constexpr auto I5 = Number<5>{}
 
static constexpr auto GemmK1Number = Number<K1>{}
 
static constexpr index_t KPerBlock = K0PerBlock * GemmK1Number
 

Member Typedef Documentation

◆ ABCGridDescs

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABCGridDescs = decltype(GetABCGridDesc<NDimSpatial>())

◆ ABDataType

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABDataType = InDataType

◆ ADataType

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ADataType = OutDataType

◆ AElementwiseOperation

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AElementwiseOperation = OutElementwiseOperation

◆ AGridDesc_K0_M_K1

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AGridDesc_K0_M_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I0])>

◆ BDataType

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BDataType = InDataType

◆ BElementwiseOperation

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BElementwiseOperation = InElementwiseOperation

◆ BGridDesc_K0_N_K1

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BGridDesc_K0_N_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I1])>

◆ Block2CTileMap

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::Block2CTileMap = decltype(GridwiseGemm::MakeDefaultBlock2CTileMap( CGridDesc_M_N{}, I1 , I1 ))

◆ CDataType

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CDataType = WeiDataType

◆ CElementwiseOperation

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CElementwiseOperation = WeiElementwiseOperation

◆ CGridDesc_M_N

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_M_N = remove_cvref_t<decltype(ABCGridDescs{}[I2])>

◆ CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock( CGridDesc_M_N{}))

◆ CShuffleDataType

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CShuffleDataType = AccDataType

◆ DeviceOp

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle

◆ DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock = decltype(GridwiseGemm::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(Tuple<>{}))

◆ GridwiseGemm

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GridwiseGemm = GridwiseGemmMultipleD_Wmma< ADataType, BDataType, AccDataType, CShuffleDataType, Tuple<>, CDataType, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, Tuple<>, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, InMemoryDataOperationEnum::Set, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, K1, MRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, true, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, true, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer>

Member Function Documentation

◆ GetABCGridDesc()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GetABCGridDesc ( )
inlinestatic

◆ GetTypeString()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::string ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
static bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsSupportedArgument ( const Argument arg)
inlinestatic

◆ IsSupportedArgument() [2/2]

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsSupportedArgument ( const BaseArgument p_arg)
inlineoverridevirtual

◆ IsValidCompilationParameter()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
static constexpr bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsValidCompilationParameter ( )
inlinestaticconstexpr

◆ make_in_grid_desc()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_in_grid_desc ( const index_t  N,
const index_t  Di,
const index_t  Hi,
const index_t  Wi,
const index_t  C,
const std::array< index_t, NDimSpatial+3 > &  input_strides 
)
inlinestaticconstexpr

◆ make_out_grid_desc()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_out_grid_desc ( const index_t  N,
const index_t  Do,
const index_t  Ho,
const index_t  Wo,
const index_t  K,
const std::array< index_t, NDimSpatial+3 > &  output_strides 
)
inlinestaticconstexpr

◆ make_wei_grid_desc()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_wei_grid_desc ( const index_t  K,
const index_t  Z,
const index_t  Y,
const index_t  X,
const index_t  C,
const std::array< index_t, NDimSpatial+3 > &  weights_strides 
)
inlinestaticconstexpr

◆ MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N ( const index_t  N,
const index_t  K,
const index_t  C,
const std::array< index_t, NDimSpatial > &  input_spatial_lengths,
const std::array< index_t, NDimSpatial > &  filter_spatial_lengths,
const std::array< index_t, NDimSpatial > &  output_spatial_lengths,
const std::array< index_t, NDimSpatial+3 > &  input_strides,
const std::array< index_t, NDimSpatial+3 > &  weights_strides,
const std::array< index_t, NDimSpatial+3 > &  output_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_dilations,
const std::array< index_t, NDimSpatial > &  input_left_pads,
const std::array< index_t, NDimSpatial > &  input_right_pads 
)
inlinestatic

◆ MakeArgument()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeArgument ( const InDataType *  p_in_grid,
WeiDataType *  p_wei_grid,
const OutDataType *  p_out_grid,
const std::array< index_t, NDimSpatial+3 > &  a_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > &  a_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > &  b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > &  b_g_k_c_xs_strides,
const std::array< index_t, NDimSpatial+3 > &  e_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > &  e_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_dilations,
const std::array< index_t, NDimSpatial > &  input_left_pads,
const std::array< index_t, NDimSpatial > &  input_right_pads,
InElementwiseOperation  in_element_op,
WeiElementwiseOperation  wei_element_op,
OutElementwiseOperation  out_element_op,
const index_t  split_k 
)
inlinestatic

◆ MakeArgumentPointer()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::unique_ptr<BaseArgument> ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeArgumentPointer ( const void *  p_in_grid,
void *  p_wei_grid,
const void *  p_out_grid,
const std::array< index_t, NDimSpatial+3 > &  a_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > &  a_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > &  b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > &  b_g_k_c_xs_strides,
const std::array< index_t, NDimSpatial+3 > &  e_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > &  e_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_strides,
const std::array< index_t, NDimSpatial > &  conv_filter_dilations,
const std::array< index_t, NDimSpatial > &  input_left_pads,
const std::array< index_t, NDimSpatial > &  input_right_pads,
InElementwiseOperation  in_element_op,
WeiElementwiseOperation  wei_element_op,
OutElementwiseOperation  out_element_op,
const index_t  split_k 
)
inlineoverridevirtual

◆ MakeInvoker()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
static auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer()

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::unique_ptr<BaseInvoker> ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeInvokerPointer ( )
inlineoverridevirtual

Member Data Documentation

◆ GemmK1Number

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GemmK1Number = Number<K1>{}
staticconstexpr

◆ I0

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I5 = Number<5>{}
staticconstexpr

◆ KPerBlock

template<index_t NDimSpatial, typename InLayout , typename WeiLayout , typename OutLayout , typename InDataType , typename WeiDataType , typename OutDataType , typename AccDataType , typename InElementwiseOperation , typename WeiElementwiseOperation , typename OutElementwiseOperation , ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr index_t ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::KPerBlock = K0PerBlock * GemmK1Number
staticconstexpr

The documentation for this struct was generated from the following file: