/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma_cshuffle_v3.hpp Source File#
device_gemm_wmma_cshuffle_v3.hpp
Go to the documentation of this file.
#define REGISTER_EXTRA_PRINTING_METHODS
Definition: device_base.hpp:46
std::string getGemmSpecializationString(const GemmSpecialization &s)
Definition: gemm_specialization.hpp:32
GemmSpecialization
Definition: gemm_specialization.hpp:11
Definition: ck.hpp:267
@ Intrawave
@ Interwave
Definition: gridwise_gemm_wmma_cshuffle_v3.hpp:393
static constexpr index_t KPack
Definition: gridwise_gemm_wmma_cshuffle_v3_common.hpp:126
"Universal" GEMM kernel with SplitK support.
Definition: gridwise_gemm_wmma_cshuffle_v3.hpp:230
Definition: sequence.hpp:43
Definition: tuple.hpp:186
Definition: device_base.hpp:51
Helper structure responsible for kernel invocation.
Definition: device_gemm_wmma_cshuffle_v3_common.hpp:56
Definition: device_gemm_wmma_cshuffle_v3_common.hpp:42
static bool IsSupportedArgument(const Argument &arg)
Definition: device_gemm_wmma_cshuffle_v3_common.hpp:257
"Universal" GEMM operation with SplitK support.
Definition: device_gemm_wmma_cshuffle_v3.hpp:179
GridwiseGemm_wmma_cshuffle_v3< ALayout, BLayout, Tuple<>, CLayout, ADataType, BDataType, AccDataType, CShuffleDataType, Tuple<>, CDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, Sequence< CShuffleBlockTransferScalarPerVector_NPerBlock >, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > GridwiseGemm
Definition: device_gemm_wmma_cshuffle_v3.hpp:230
static auto MakeArgument(const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation cde_element_op)
Definition: device_gemm_wmma_cshuffle_v3.hpp:272
std::string GetTypeString() const override
Definition: device_gemm_wmma_cshuffle_v3.hpp:344
typename DeviceGemmCommon::Invoker Invoker
Definition: device_gemm_wmma_cshuffle_v3.hpp:254
static auto MakeInvoker()
Definition: device_gemm_wmma_cshuffle_v3.hpp:303
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition: device_gemm_wmma_cshuffle_v3.hpp:262
bool GetPermuteA() override
Definition: device_gemm_wmma_cshuffle_v3.hpp:269
typename GridwiseGemm::Argument Argument
Definition: device_gemm_wmma_cshuffle_v3.hpp:232
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition: device_gemm_wmma_cshuffle_v3.hpp:338
bool GetPermuteB() override
Definition: device_gemm_wmma_cshuffle_v3.hpp:270
index_t GetKPerBlock() override
Definition: device_gemm_wmma_cshuffle_v3.hpp:267
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
Definition: device_gemm_wmma_cshuffle_v3.hpp:306
static bool IsSupportedArgument(const Argument &arg)
Definition: device_gemm_wmma_cshuffle_v3.hpp:256
Definition: device_gemm_v2.hpp:22