Invoker Struct Reference#
ck::tensor_operation::device::DeviceGemmMultipleD_Xdl_CShuffle_LdsDirectLoad< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer, ComputeDataType >::Invoker Struct Reference
#include <device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmMultipleD_Xdl_CShuffle_LdsDirectLoad< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer, ComputeDataType >::Invoker:
Public Member Functions | |
| float | Run (const Argument &arg, const StreamConfig &stream_config=StreamConfig{}) |
| float | Run (const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override |
Public Member Functions inherited from ck::tensor_operation::device::BaseInvoker | |
| BaseInvoker ()=default | |
| BaseInvoker (const BaseInvoker &)=default | |
| BaseInvoker & | operator= (const BaseInvoker &)=default |
| virtual | ~BaseInvoker () |
Member Function Documentation
◆ Run() [1/2]
template<typename ALayout , typename BLayout , typename DsLayout , typename ELayout , typename ADataType , typename BDataType , typename AccDataType , typename CShuffleDataType , typename DsDataType , typename EDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CDEElementwiseOperation , GemmSpecialization GemmSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1 , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferScalarPerVector, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1 , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferScalarPerVector, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::v4, typename ComputeDataType = EDataType>
|
inline |
◆ Run() [2/2]
template<typename ALayout , typename BLayout , typename DsLayout , typename ELayout , typename ADataType , typename BDataType , typename AccDataType , typename CShuffleDataType , typename DsDataType , typename EDataType , typename AElementwiseOperation , typename BElementwiseOperation , typename CDEElementwiseOperation , GemmSpecialization GemmSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1 , typename ABlockTransferSrcAccessOrder , index_t ABlockTransferSrcVectorDim, index_t ABlockTransferScalarPerVector, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1 , typename BBlockTransferSrcAccessOrder , index_t BBlockTransferSrcVectorDim, index_t BBlockTransferScalarPerVector, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::v4, typename ComputeDataType = EDataType>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseInvoker.
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp
Public Member Functions inherited from