#include <wmma_gemm.hpp>
|
__host__ constexpr __device__ | WmmaGemm () |
|
template<class FloatA , class FloatB , class FloatC > |
__device__ void | Run (const FloatA &p_a_wave, const FloatB &p_b_wave, FloatC &p_c_thread) const |
|
|
static constexpr auto | I0 = Number<0>{} |
|
static constexpr auto | I1 = Number<1>{} |
|
static constexpr auto | I2 = Number<2>{} |
|
static constexpr auto | I3 = Number<3>{} |
|
static constexpr auto | I4 = Number<4>{} |
|
static constexpr auto | I5 = Number<5>{} |
|
static constexpr auto | wmma |
|
static constexpr auto | wmma_instr = wmma.selected_wmma |
|
◆ CIndex
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
◆ CIndex3D
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
◆ WmmaGemm()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ constexpr __device__ ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::WmmaGemm |
( |
| ) |
|
|
inlineconstexpr |
◆ CalculateAThreadOriginDataIndex()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateAThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateBThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ GetBeginOfThreadBlk()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ CIndex ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk |
( |
| ) |
|
|
inlinestatic |
◆ GetBeginOfThreadBlk3D()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ CIndex3D ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk3D |
( |
| ) |
|
|
inlinestatic |
◆ GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetLaneId()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneId |
( |
| ) |
|
|
inlinestatic |
◆ GetLaneIdUnderSubGroup()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneIdUnderSubGroup |
( |
| ) |
|
|
inlinestatic |
◆ GetRegSizePerWmma()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static constexpr __device__ index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetRegSizePerWmma |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetSubGroupId()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSubGroupId |
( |
| ) |
|
|
inlinestatic |
◆ GetSwizzledLaneIdLow()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSwizzledLaneIdLow |
( |
| ) |
|
|
inlinestatic |
◆ GetWaveSize()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static constexpr __device__ index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetWaveSize |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs |
( |
const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA & |
c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma | ) |
|
|
inlinestaticconstexpr |
◆ MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs |
( |
const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA & |
c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma | ) |
|
|
inlinestaticconstexpr |
◆ Run()
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<class FloatA , class FloatB , class FloatC >
__device__ void ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::Run |
( |
const FloatA & |
p_a_wave, |
|
|
const FloatB & |
p_b_wave, |
|
|
FloatC & |
p_c_thread |
|
) |
| const |
|
inline |
◆ I0
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I2
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I3 = Number<3>{} |
|
staticconstexpr |
◆ I4
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I4 = Number<4>{} |
|
staticconstexpr |
◆ I5
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I5 = Number<5>{} |
|
staticconstexpr |
◆ wmma
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma |
|
staticconstexpr |
Initial value:=
WmmaSelector<src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma>{}
◆ wmma_instr
template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma_instr = wmma.selected_wmma |
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp