/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp Source File#
device_gemm_xdl_cshuffle_streamk_v3.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:13
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:90
std::string getGemmSpecializationString(const GemmSpecialization &s)
Definition: gemm_specialization.hpp:33
GemmSpecialization
Definition: gemm_specialization.hpp:11
Definition: ck.hpp:264
BlockGemmPipelineVersion
Definition: blockwise_gemm_pipeline_xdlops_ab_scale_selector.hpp:13
@ One
@ Seven
@ Even
@ Odd
@ Four
@ Two
@ Full
@ Three
@ Five
@ Six
__global__ void kernel_gemm_xdl_cshuffle_v3_2lds(typename GridwiseGemm::Argument karg)
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:58
@ Intrawave
@ Interwave
__global__ void kernel_gemm_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg)
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:37
Definition: stream_config.hpp:10
static constexpr StreamKReductionStrategy ReductionStrategy
Definition: block_to_ctile_map.hpp:1422
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:517
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:126
static constexpr __host__ TailNumber CalculateKBlockLoopTailNum(index_t K)
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:1130
static constexpr __host__ bool CalculateHasMainKBlockLoop(index_t K)
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:1123
static constexpr __host__ bool CheckValidity(const Argument &karg)
Definition: gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:940
Definition: device_base.hpp:50
void * p_workspace_
Definition: device_base.hpp:57
Definition: device_base.hpp:61
Definition: device_gemm_streamk_v2.hpp:22
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:131
float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:430
float Run(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:132
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:77
size_t GetWorkSpaceSize(const BaseArgument *pArg) const override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:437
typename GridwiseGemm::Argument Argument
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:127
static auto MakeArgument(const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t streamk_sel, index_t Grid_size, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation)
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:494
static constexpr bool IsValidCompilationParameter()
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:460
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:743
void SetWorkSpacePointer(BaseArgument *pArg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:451
GridwiseGemm_xdl_cshuffle_streamk_v3< ALayout, BLayout, CLayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, CDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB > GridwiseGemm
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:125
static bool IsSupportedArgument(const Argument &arg)
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:466
static auto MakeInvoker()
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:711
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:489
std::string GetTypeString() const override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:749
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t streamk_sel, index_t Grid_size, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation) override
Definition: device_gemm_xdl_cshuffle_streamk_v3.hpp:714
Definition: flush_cache.hpp:137