/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp File Reference#
device_grouped_gemm_xdl_fixed_nk.hpp File Reference
#include <iostream>
#include <sstream>
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
Go to the source code of this file.
Namespaces | |
ck | |
ck::tensor_operation | |
ck::tensor_operation::device | |
Functions | |
template<typename GridwiseGemm , typename GemmDesc , GemmSpecialization GemmSpec, bool Zeroing, typename ALayout , typename BLayout , typename DsLayout , typename ELayout , typename DsDataType , typename Block2ETileMap , typename GroupedGemmBlock2ETileMap , typename AElementwiseOperation , typename BElementwiseOperation , typename CDEElementwiseOperation , InMemoryDataOperationEnum EGlobalMemoryDataOperation, bool HasMainKBlockLoop> | |
__global__ void | ck::tensor_operation::device::kernel_grouped_gemm_xdl_fixed_nk (const void CK_CONSTANT_ADDRESS_SPACE *gemm_descs_const, uint32_t *barrier_count, const index_t barrier_size_grp, const index_t group_count, const index_t grid_size_grp, const index_t KBatch, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CDEElementwiseOperation c_element_op) |