Invoker Struct Reference#
ck::tensor_operation::device::DeviceGemmXdlStreamK< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL >::Invoker Struct Reference
#include <device_gemm_xdl_streamk.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmXdlStreamK< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL >::Invoker:
Public Member Functions | |
| void | Print (const Argument &karg) |
| float | Run (const Argument &karg, const StreamConfig &stream_config=StreamConfig{}) |
| float | Run (const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override |
Public Member Functions inherited from ck::tensor_operation::device::BaseInvoker | |
| BaseInvoker ()=default | |
| BaseInvoker (const BaseInvoker &)=default | |
| BaseInvoker & | operator= (const BaseInvoker &)=default |
| virtual | ~BaseInvoker () |
Member Function Documentation
◆ Print()
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, ck::index_t ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, ck::index_t BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CBlockTransferScalarPerVector_NWaveNPerXDL>
|
inline |
◆ Run() [1/2]
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, ck::index_t ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, ck::index_t BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CBlockTransferScalarPerVector_NWaveNPerXDL>
|
inline |
◆ Run() [2/2]
template<typename ADataType , typename BDataType , typename CDataType , typename AccDataType , typename ALayout , typename BLayout , typename CLayout , typename AElementwiseOperation , typename BElementwiseOperation , typename CElementwiseOperation , ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, ck::index_t ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1 , typename BBlockTransferThreadClusterArrangeOrder , typename BBlockTransferSrcAccessOrder , ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, ck::index_t BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CBlockTransferScalarPerVector_NWaveNPerXDL>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseInvoker.
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp
Public Member Functions inherited from