SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference

SparseXdlopsGemm&lt; base_type, MPerXdlops, NPerXdlops, KPack, additional_type &gt; Struct Template Reference#

Composable Kernel: ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference
ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference

#include <smfmac_xdlops_gemm.hpp>

Public Types

using CIndex = MultiIndex< 2 >
 
using CIndex4D = MultiIndex< 4 >
 

Public Member Functions

__host__ constexpr __device__ SparseXdlopsGemm ()
 
template<class FloatA , class FloatB , class Idx , class FloatC >
__device__ void Run (const FloatA &p_a_wave, const FloatB &p_b_wave, const Idx &idx, FloatC &p_c_thread) const
 

Static Public Member Functions

static constexpr __device__ index_t GetNumBlks ()
 
static constexpr __device__ index_t GetNumXdlops ()
 
template<typename CDesc_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CDesc_M0_N0_M1_N1_M2_N2 &c_desc_m0_n0_m1_n1_m2_n2)
 
template<typename CDesc_G_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CDesc_G_M0_N0_M1_N1_M2_N2 &c_desc_g_m0_n0_m1_n1_m2_n2)
 
static constexpr __device__ index_t GetRegSizePerXdlops ()
 
static constexpr __device__ index_t GetWaveSize ()
 
static __device__ auto GetLaneId ()
 
static __device__ auto GetBlkIdx ()
 
__host__ static __device__ auto CalculateAThreadOriginDataIndex ()
 
__host__ static __device__ auto CalculateBThreadOriginDataIndex ()
 
static __device__ CIndex GetBeginOfThreadBlk (index_t xdlops_i, index_t blk_i)
 
static __device__ CIndex4D GetBeginOfThreadBlk4D (index_t, index_t)
 
__host__ static constexpr __device__ auto GetCM0M1M2NThreadBlkLengths ()
 

Static Public Attributes

static constexpr auto I0 = Number<0>{}
 
static constexpr auto I1 = Number<1>{}
 
static constexpr auto I2 = Number<2>{}
 
static constexpr auto I3 = Number<3>{}
 
static constexpr auto I4 = Number<4>{}
 
static constexpr auto I5 = Number<5>{}
 
static constexpr auto smfmac
 
static constexpr auto smfmac_instr = smfmac.selected_smfmac
 
static constexpr auto KPerXdlops = smfmac.GetKPerXdlops()
 
static constexpr auto K1PerXdlops = smfmac.GetK1PerXdlops()
 
static constexpr auto K0PerXdlops = KPerXdlops / K1PerXdlops
 

Member Typedef Documentation

◆ CIndex

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
using ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CIndex = MultiIndex<2>

◆ CIndex4D

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
using ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CIndex4D = MultiIndex<4>

Constructor & Destructor Documentation

◆ SparseXdlopsGemm()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ constexpr __device__ ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::SparseXdlopsGemm ( )
inlineconstexpr

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ GetBeginOfThreadBlk()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static __device__ CIndex ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBeginOfThreadBlk ( index_t  xdlops_i,
index_t  blk_i 
)
inlinestatic

◆ GetBeginOfThreadBlk4D()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static __device__ CIndex4D ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBeginOfThreadBlk4D ( index_t  ,
index_t   
)
inlinestatic

◆ GetBlkIdx()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBlkIdx ( )
inlinestatic

◆ GetCM0M1M2NThreadBlkLengths()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static constexpr __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetCM0M1M2NThreadBlkLengths ( )
inlinestaticconstexpr

◆ GetLaneId()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetLaneId ( )
inlinestatic

◆ GetNumBlks()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static constexpr __device__ index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetNumBlks ( )
inlinestaticconstexpr

◆ GetNumXdlops()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static constexpr __device__ index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetNumXdlops ( )
inlinestaticconstexpr

◆ GetRegSizePerXdlops()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static constexpr __device__ index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetRegSizePerXdlops ( )
inlinestaticconstexpr

◆ GetWaveSize()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
static constexpr __device__ index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetWaveSize ( )
inlinestaticconstexpr

◆ MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<typename CDesc_G_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CDesc_G_M0_N0_M1_N1_M2_N2 &  c_desc_g_m0_n0_m1_n1_m2_n2)
inlinestaticconstexpr

◆ MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<typename CDesc_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CDesc_M0_N0_M1_N1_M2_N2 &  c_desc_m0_n0_m1_n1_m2_n2)
inlinestaticconstexpr

◆ Run()

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<class FloatA , class FloatB , class Idx , class FloatC >
__device__ void ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::Run ( const FloatA &  p_a_wave,
const FloatB &  p_b_wave,
const Idx &  idx,
FloatC &  p_c_thread 
) const
inline

Member Data Documentation

◆ I0

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I5 = Number<5>{}
staticconstexpr

◆ K0PerXdlops

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::K0PerXdlops = KPerXdlops / K1PerXdlops
staticconstexpr

◆ K1PerXdlops

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::K1PerXdlops = smfmac.GetK1PerXdlops()
staticconstexpr

◆ KPerXdlops

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::KPerXdlops = smfmac.GetKPerXdlops()
staticconstexpr

◆ smfmac

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::smfmac
staticconstexpr
Initial value:
=
SmfmacSelector<base_type, MPerXdlops, NPerXdlops, additional_type>{}

◆ smfmac_instr

template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::smfmac_instr = smfmac.selected_smfmac
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/warp/smfmac_xdlops_gemm.hpp