Argument Struct Reference

Argument Struct Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument Struct Reference
ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument Struct Reference

#include <device_gemm_dl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument:
ck::tensor_operation::device::BaseArgument

Public Member Functions

 Argument (const ADataType *p_a_grid, const BDataType *p_b_grid, CDataType *p_c_grid, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t M01, index_t N01, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)
 
- Public Member Functions inherited from ck::tensor_operation::device::BaseArgument
 BaseArgument ()=default
 
 BaseArgument (const BaseArgument &)=default
 
BaseArgumentoperator= (const BaseArgument &)=default
 
virtual ~BaseArgument ()
 

Public Attributes

const ADataType * p_a_grid_
 
const BDataType * p_b_grid_
 
CDataType * p_c_grid_
 
AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1_
 
BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1_
 
CGridDesc_M_N c_grid_desc_m_n_
 
AGridDesc_K0_M0_M1_K1 a_grid_desc_k0_m0_m1_k1_
 
BGridDesc_K0_N0_N1_K1 b_grid_desc_k0_n0_n1_k1_
 
CGridDesc_M0_M10_M11_N0_N10_N11 c_grid_desc_m0_m10_m11_n0_n10_n11_
 
DefaultBlock2CTileMap block_2_ctile_map_
 
index_t M01_
 
index_t N01_
 
index_t M_raw_
 
index_t N_raw_
 
index_t K_raw_
 
AElementwiseOperation a_element_op_
 
BElementwiseOperation b_element_op_
 
CElementwiseOperation c_element_op_
 
- Public Attributes inherited from ck::tensor_operation::device::BaseArgument
void * p_workspace_ = nullptr
 

Constructor & Destructor Documentation

◆ Argument()

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::Argument ( const ADataType *  p_a_grid,
const BDataType *  p_b_grid,
CDataType *  p_c_grid,
index_t  M,
index_t  N,
index_t  K,
index_t  StrideA,
index_t  StrideB,
index_t  StrideC,
index_t  M01,
index_t  N01,
AElementwiseOperation  a_element_op,
BElementwiseOperation  b_element_op,
CElementwiseOperation  c_element_op 
)
inline

Member Data Documentation

◆ a_element_op_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
AElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_element_op_

◆ a_grid_desc_k0_m0_m1_k1_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
AGridDesc_K0_M0_M1_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_grid_desc_k0_m0_m1_k1_

◆ a_grid_desc_k0_m_k1_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
AGridDesc_K0_M_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_grid_desc_k0_m_k1_

◆ b_element_op_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
BElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_element_op_

◆ b_grid_desc_k0_n0_n1_k1_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
BGridDesc_K0_N0_N1_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_grid_desc_k0_n0_n1_k1_

◆ b_grid_desc_k0_n_k1_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
BGridDesc_K0_N_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_grid_desc_k0_n_k1_

◆ block_2_ctile_map_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
DefaultBlock2CTileMap ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::block_2_ctile_map_

◆ c_element_op_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
CElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_element_op_

◆ c_grid_desc_m0_m10_m11_n0_n10_n11_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
CGridDesc_M0_M10_M11_N0_N10_N11 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_grid_desc_m0_m10_m11_n0_n10_n11_

◆ c_grid_desc_m_n_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
CGridDesc_M_N ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_grid_desc_m_n_

◆ K_raw_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::K_raw_

◆ M01_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::M01_

◆ M_raw_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::M_raw_

◆ N01_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::N01_

◆ N_raw_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::N_raw_

◆ p_a_grid_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
const ADataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_a_grid_

◆ p_b_grid_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
const BDataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_b_grid_

◆ p_c_grid_

template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
CDataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_c_grid_

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp