WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference

WmmaGemm&lt; src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend &gt; Struct Template Reference#

Composable Kernel: ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference
ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference

#include <wmma_gemm.hpp>

Public Types

using CIndex = MultiIndex< 2 >
 
using CIndex3D = MultiIndex< 3 >
 

Public Member Functions

__host__ constexpr __device__ WmmaGemm ()
 
template<class FloatA , class FloatB , class FloatC >
__device__ void Run (const FloatA &p_a_wave, const FloatB &p_b_wave, FloatC &p_c_thread) const
 

Static Public Member Functions

template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs (const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
 
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs (const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
 
static constexpr __device__ index_t GetRegSizePerWmma ()
 
static constexpr __device__ index_t GetWaveSize ()
 
static __device__ auto GetLaneId ()
 
static __device__ auto GetSubGroupId ()
 
static __device__ auto GetLaneIdUnderSubGroup ()
 
static __device__ auto GetSwizzledLaneIdLow ()
 
__host__ static __device__ auto CalculateAThreadOriginDataIndex ()
 
__host__ static __device__ auto CalculateBThreadOriginDataIndex ()
 
static __device__ CIndex GetBeginOfThreadBlk ()
 
static __device__ CIndex3D GetBeginOfThreadBlk3D ()
 
__host__ static constexpr __device__ auto GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths ()
 

Static Public Attributes

static constexpr auto I0 = Number<0>{}
 
static constexpr auto I1 = Number<1>{}
 
static constexpr auto I2 = Number<2>{}
 
static constexpr auto I3 = Number<3>{}
 
static constexpr auto I4 = Number<4>{}
 
static constexpr auto I5 = Number<5>{}
 
static constexpr auto wmma
 
static constexpr auto wmma_instr = wmma.selected_wmma
 

Member Typedef Documentation

◆ CIndex

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
using ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CIndex = MultiIndex<2>

◆ CIndex3D

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
using ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CIndex3D = MultiIndex<3>

Constructor & Destructor Documentation

◆ WmmaGemm()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ constexpr __device__ ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::WmmaGemm ( )
inlineconstexpr

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ GetBeginOfThreadBlk()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ CIndex ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk ( )
inlinestatic

◆ GetBeginOfThreadBlk3D()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ CIndex3D ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk3D ( )
inlinestatic

◆ GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths ( )
inlinestaticconstexpr

◆ GetLaneId()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneId ( )
inlinestatic

◆ GetLaneIdUnderSubGroup()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneIdUnderSubGroup ( )
inlinestatic

◆ GetRegSizePerWmma()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static constexpr __device__ index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetRegSizePerWmma ( )
inlinestaticconstexpr

◆ GetSubGroupId()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSubGroupId ( )
inlinestatic

◆ GetSwizzledLaneIdLow()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSwizzledLaneIdLow ( )
inlinestatic

◆ GetWaveSize()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
static constexpr __device__ index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetWaveSize ( )
inlinestaticconstexpr

◆ MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs ( const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &  c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
inlinestaticconstexpr

◆ MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA >
__host__ static constexpr __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs ( const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &  c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
inlinestaticconstexpr

◆ Run()

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<class FloatA , class FloatB , class FloatC >
__device__ void ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::Run ( const FloatA &  p_a_wave,
const FloatB &  p_b_wave,
FloatC &  p_c_thread 
) const
inline

Member Data Documentation

◆ I0

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I5 = Number<5>{}
staticconstexpr

◆ wmma

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma
staticconstexpr
Initial value:
=
WmmaSelector<src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma>{}

◆ wmma_instr

template<typename src_type_a , typename src_type_b , typename dst_type , index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma_instr = wmma.selected_wmma
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp