/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/ops/gemm/kernel/batched_gemm_kernel.hpp Source File#
batched_gemm_kernel.hpp
Go to the documentation of this file.
Definition: cluster_descriptor.hpp:13
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition: concat.hpp:41
Definition: batched_gemm_kernel.hpp:13
ck_tile::index_t batch_stride_B
Definition: batched_gemm_kernel.hpp:49
ck_tile::index_t batch_stride_A
Definition: batched_gemm_kernel.hpp:48
ck_tile::index_t batch_stride_E
Definition: batched_gemm_kernel.hpp:50
CK_TILE_HOST BatchedGemmHostArgs(const void *a_ptr_, const void *b_ptr_, void *c_ptr_, ck_tile::index_t k_batch_, ck_tile::index_t M_, ck_tile::index_t N_, ck_tile::index_t K_, ck_tile::index_t stride_A_, ck_tile::index_t stride_B_, ck_tile::index_t stride_C_, ck_tile::index_t batch_stride_A_, ck_tile::index_t batch_stride_B_, ck_tile::index_t batch_stride_C_, ck_tile::index_t batch_count_)
Definition: batched_gemm_kernel.hpp:15
ck_tile::index_t batch_count
Definition: batched_gemm_kernel.hpp:51
CK_TILE_HOST BatchedGemmHostArgs()=default
Definition: batched_gemm_kernel.hpp:85
index_t batch_stride_E
Definition: batched_gemm_kernel.hpp:88
index_t batch_count
Definition: batched_gemm_kernel.hpp:89
index_t batch_stride_A
Definition: batched_gemm_kernel.hpp:86
index_t batch_stride_B
Definition: batched_gemm_kernel.hpp:87
Definition: batched_gemm_kernel.hpp:56
typename Base::BDataType BDataType
Definition: batched_gemm_kernel.hpp:62
typename Base::BLayout BLayout
Definition: batched_gemm_kernel.hpp:69
typename Base::ADataType ADataType
Definition: batched_gemm_kernel.hpp:61
static CK_TILE_HOST const std::string GetName()
Definition: batched_gemm_kernel.hpp:72
typename Base::ELayout CLayout
Definition: batched_gemm_kernel.hpp:70
static constexpr CK_TILE_HOST BatchedGemmKernelArgs MakeKernelArgs(const BatchedGemmHostArgs &hostArgs)
Definition: batched_gemm_kernel.hpp:103
typename Base::TilePartitioner TilePartitioner
Definition: batched_gemm_kernel.hpp:65
typename Base::EDataType CDataType
Definition: batched_gemm_kernel.hpp:63
typename Base::ALayout ALayout
Definition: batched_gemm_kernel.hpp:68
static constexpr __host__ auto GridSize(index_t M, index_t N, index_t KBatch, index_t batch_count)
Definition: batched_gemm_kernel.hpp:95
CK_TILE_DEVICE void operator()(BatchedGemmKernelArgs kargs) const
Definition: batched_gemm_kernel.hpp:128
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize()
Definition: batched_gemm_kernel.hpp:123
static constexpr __host__ auto BlockSize()
Definition: batched_gemm_kernel.hpp:100
typename ck_tile::GemmKernelArgs<> GemmKernelArgs
Definition: batched_gemm_kernel.hpp:59
typename Base::GemmPipeline GemmPipeline
Definition: batched_gemm_kernel.hpp:66
typename Base::EpiloguePipeline EpiloguePipeline
Definition: batched_gemm_kernel.hpp:67
Definition: gemm_kernel.hpp:251
index_t b_k_split_offset
Definition: gemm_kernel.hpp:287
index_t a_k_split_offset
Definition: gemm_kernel.hpp:286
const void * a_ptr
The A input tensor's pointer to device memory.
Definition: gemm_kernel.hpp:87
const void * b_ptr
The B input tensor's pointer to device memory.
Definition: gemm_kernel.hpp:89
void * e_ptr
The E output tensor's pointer to device memory.
Definition: gemm_kernel.hpp:93
remove_cvref_t< typename EpiloguePipeline::ODataType > EDataType
Definition: gemm_kernel.hpp:183
remove_cvref_t< typename GemmPipeline::ADataType > ADataType
Definition: gemm_kernel.hpp:180
remove_cvref_t< typename GemmPipeline::CLayout > ELayout
Definition: gemm_kernel.hpp:160
static CK_TILE_DEVICE void RunGemm(const ADataType *a_ptr, const BDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *smem_ptr_0, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n)
Runs single GEMM problem cooperatively by whole workgroup.
Definition: gemm_kernel.hpp:794
remove_cvref_t< typename GemmPipeline::ALayout > ALayout
Definition: gemm_kernel.hpp:157
static constexpr index_t KernelBlockSize
Definition: gemm_kernel.hpp:163
remove_cvref_t< typename GemmPipeline::BDataType > BDataType
Definition: gemm_kernel.hpp:181
remove_cvref_t< GemmPipeline_ > GemmPipeline
Definition: gemm_kernel.hpp:155
remove_cvref_t< typename GemmPipeline::BLayout > BLayout
Definition: gemm_kernel.hpp:158
remove_cvref_t< TilePartitioner_ > TilePartitioner
Definition: gemm_kernel.hpp:154
remove_cvref_t< EpiloguePipeline_ > EpiloguePipeline
Definition: gemm_kernel.hpp:156