#include <xdlops_gemm.hpp>
|
__host__ constexpr __device__ | XdlopsGemm () |
|
template<class FloatA , class FloatB , class FloatC > |
__device__ void | Run (const FloatA &p_a_wave, const FloatB &p_b_wave, FloatC &p_c_thread) const |
|
◆ CIndex
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
◆ CIndex4D
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
◆ XdlopsGemm()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
__host__ constexpr __device__ ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::XdlopsGemm |
( |
| ) |
|
|
inlineconstexpr |
◆ CalculateAThreadOriginDataIndex()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
__host__ static __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::CalculateAThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
__host__ static __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::CalculateBThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ GetBeginOfThreadBlk()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
◆ GetBeginOfThreadBlk4D()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
◆ GetBlkIdx()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetBlkIdx |
( |
| ) |
|
|
inlinestatic |
◆ GetCM0M1M2NThreadBlkLengths()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
__host__ static constexpr __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetCM0M1M2NThreadBlkLengths |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetLaneId()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetLaneId |
( |
| ) |
|
|
inlinestatic |
◆ GetNumBlks()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static constexpr __device__ index_t ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetNumBlks |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetNumXdlops()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static constexpr __device__ index_t ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetNumXdlops |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetRegSizePerXdlops()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static constexpr __device__ index_t ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetRegSizePerXdlops |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetWaveSize()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
static constexpr __device__ index_t ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::GetWaveSize |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
template<typename CDesc_G_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CDesc_G_M0_N0_M1_N1_M2_N2 & |
c_desc_g_m0_n0_m1_n1_m2_n2 | ) |
|
|
inlinestaticconstexpr |
◆ MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
template<typename CDesc_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CDesc_M0_N0_M1_N1_M2_N2 & |
c_desc_m0_n0_m1_n1_m2_n2 | ) |
|
|
inlinestaticconstexpr |
◆ MakeCDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
template<typename CDesc_M0_N0_M1_N1_M2_N2 >
__host__ static constexpr __device__ auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::MakeCDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 |
( |
const CDesc_M0_N0_M1_N1_M2_N2 & |
c_desc_m0_n0_m1_n1_m2_n2 | ) |
|
|
inlinestaticconstexpr |
◆ Run()
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
template<class FloatA , class FloatB , class FloatC >
__device__ void ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::Run |
( |
const FloatA & |
p_a_wave, |
|
|
const FloatB & |
p_b_wave, |
|
|
FloatC & |
p_c_thread |
|
) |
| const |
|
inline |
◆ I0
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I2
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I3 = Number<3>{} |
|
staticconstexpr |
◆ I4
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I4 = Number<4>{} |
|
staticconstexpr |
◆ I5
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::I5 = Number<5>{} |
|
staticconstexpr |
◆ K0PerXdlops
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
◆ K1PerXdlops
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::K1PerXdlops = mfma.GetK1PerXdlops() |
|
staticconstexpr |
◆ KPerXdlops
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::KPerXdlops = mfma.GetKPerXdlops() |
|
staticconstexpr |
◆ mfma
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::mfma |
|
staticconstexpr |
Initial value:= MfmaSelector < base_type,
MPerXdlops, NPerXdlops, additional_type,
? true
: false > {}
static constexpr bool value
Definition: integral_constant.hpp:11
◆ mfma_instr
template<typename base_type , index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type, bool TransposeC = false>
constexpr auto ck::XdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type, TransposeC >::mfma_instr = mfma.selected_mfma |
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp