Argument Struct Reference#
ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument Struct Reference
#include <device_gemm_dl.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument:
Public Member Functions | |
| Argument (const ADataType *p_a_grid, const BDataType *p_b_grid, CDataType *p_c_grid, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t M01, index_t N01, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) | |
Public Member Functions inherited from ck::tensor_operation::device::BaseArgument | |
| BaseArgument ()=default | |
| BaseArgument (const BaseArgument &)=default | |
| BaseArgument & | operator= (const BaseArgument &)=default |
| virtual | ~BaseArgument () |
Public Attributes | |
| const ADataType * | p_a_grid_ |
| const BDataType * | p_b_grid_ |
| CDataType * | p_c_grid_ |
| AGridDesc_K0_M_K1 | a_grid_desc_k0_m_k1_ |
| BGridDesc_K0_N_K1 | b_grid_desc_k0_n_k1_ |
| CGridDesc_M_N | c_grid_desc_m_n_ |
| AGridDesc_K0_M0_M1_K1 | a_grid_desc_k0_m0_m1_k1_ |
| BGridDesc_K0_N0_N1_K1 | b_grid_desc_k0_n0_n1_k1_ |
| CGridDesc_M0_M10_M11_N0_N10_N11 | c_grid_desc_m0_m10_m11_n0_n10_n11_ |
| DefaultBlock2CTileMap | block_2_ctile_map_ |
| index_t | M01_ |
| index_t | N01_ |
| index_t | M_raw_ |
| index_t | N_raw_ |
| index_t | K_raw_ |
| AElementwiseOperation | a_element_op_ |
| BElementwiseOperation | b_element_op_ |
| CElementwiseOperation | c_element_op_ |
Public Attributes inherited from ck::tensor_operation::device::BaseArgument | |
| void * | p_workspace_ = nullptr |
Constructor & Destructor Documentation
◆ Argument()
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inline |
Member Data Documentation
◆ a_element_op_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| AElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_element_op_ |
◆ a_grid_desc_k0_m0_m1_k1_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| AGridDesc_K0_M0_M1_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_grid_desc_k0_m0_m1_k1_ |
◆ a_grid_desc_k0_m_k1_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| AGridDesc_K0_M_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::a_grid_desc_k0_m_k1_ |
◆ b_element_op_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| BElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_element_op_ |
◆ b_grid_desc_k0_n0_n1_k1_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| BGridDesc_K0_N0_N1_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_grid_desc_k0_n0_n1_k1_ |
◆ b_grid_desc_k0_n_k1_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| BGridDesc_K0_N_K1 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::b_grid_desc_k0_n_k1_ |
◆ block_2_ctile_map_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| DefaultBlock2CTileMap ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::block_2_ctile_map_ |
◆ c_element_op_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| CElementwiseOperation ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_element_op_ |
◆ c_grid_desc_m0_m10_m11_n0_n10_n11_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| CGridDesc_M0_M10_M11_N0_N10_N11 ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_grid_desc_m0_m10_m11_n0_n10_n11_ |
◆ c_grid_desc_m_n_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| CGridDesc_M_N ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::c_grid_desc_m_n_ |
◆ K_raw_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::K_raw_ |
◆ M01_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::M01_ |
◆ M_raw_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::M_raw_ |
◆ N01_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::N01_ |
◆ N_raw_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| index_t ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::N_raw_ |
◆ p_a_grid_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| const ADataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_a_grid_ |
◆ p_b_grid_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| const BDataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_b_grid_ |
◆ p_c_grid_
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs , typename M1N1ThreadClusterN1Xs , typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 , typename ABlockTransferSrcVectorTensorContiguousDimOrder , typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 , typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 , typename BBlockTransferSrcVectorTensorContiguousDimOrder , typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 , typename CThreadTransferSrcDstAccessOrder , index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< CElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| CDataType* ck::tensor_operation::device::DeviceGemmDl< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::Argument::p_c_grid_ |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
Public Member Functions inherited from