Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Public Attributes |
Static Public Attributes |
List of all members
ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
#include <blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_gufusion_v3.hpp>
Inheritance diagram for ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >:
Public Types | |
| using | Base = BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > |
| using | AccType = typename Base::AccType |
| using | Tuple5 = typename Base::Tuple5 |
| using | ComputeTypeA = typename Base::ComputeTypeA |
| using | ComputeTypeB = typename Base::ComputeTypeB |
| using | mx_scale_t = e8m0_bexp_t |
| using | AThreadCopy = ThreadwiseTensorSliceTransfer_v4< ADataType, ComputeTypeA, decltype(a_block_desc_m0_m1_m2_m3_k), decltype(a_thread_desc_), Sequence< 1, 1, 1, 1, KThreadChunk >, Sequence< 0, 1, 2, 3, 4 >, 4, A_K1, A_K1 > |
Public Types inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| using | ComputeTypeA = ADataType |
| using | ComputeTypeB = BDataType |
| using | AccType = float |
| using | ThisThreadBlock = ThisThreadBlock< BlockSize > |
| using | HotLoopInstList = ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, A_K1, B_K1, A_K1, B_K1, MRepeat, NRepeat, MPerXDL, NPerXDL, xdlops_gemm.KPerXdlops,(packed_size_v< ComputeTypeA > > 1||packed_size_v< ComputeTypeB > > 1)> |
| using | Tuple5 = decltype(CalculateAThreadOriginDataIndex()) |
Public Member Functions | |
| template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc , typename ABlockDesc , typename ABlockTransfer , typename AGridBuffer , typename ABlockBuffer , typename ABlockTransferStep , typename BGridDesc , typename BBlockDesc , typename BBlockTransfer , typename BGridBuffer , typename BBlockBuffer , typename BBlockTransferStep , typename CThreadBuffer , typename AScaleGridBuffer , typename AScaleGridDesc , typename AScaleThreadTransfer , typename BScaleGridBuffer , typename BScaleGridDesc , typename BScaleThreadTransfer > | |
| __device__ void | Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_bufs, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, BBlockTransfer &b_blockwise_copy_up, const BGridBuffer &b_grid_buf, const BGridBuffer &b_grid_buf_up, BBlockBuffer &b_block_bufs, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, CThreadBuffer &c_thread_buf_up, const AScaleGridDesc &a_scale_grid_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const BScaleGridDesc &b_scale_grid_desc, BScaleThreadTransfer &b_scale_thread_copy, BScaleThreadTransfer &b_scale_thread_copy_up, const BScaleGridBuffer &b_scale_grid_buf, const BScaleGridBuffer &b_scale_grid_buf_up, index_t num_loop) const |
Public Member Functions inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| __host__ constexpr __device__ auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_mx_pipeline_base (Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex()) |
| Constructor for BlockwiseGemmXdlops_mx_pipeline_base. More... | |
Public Attributes | |
| AThreadCopy | a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()} |
Public Attributes inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
Static Public Attributes | |
| static constexpr index_t | PrefetchStages = 2 |
| static constexpr index_t | LocalPrefetchStages = 2 |
| static constexpr index_t | PrefillStages = 1 |
| static constexpr index_t | GlobalBufferNum = 1 |
| static constexpr index_t | HotloopLocalBufSwitch = MRepeat % 2 == 0 ? 0 : 1 |
| static constexpr auto | num_buffer_load_a_scale = MRepeat / MXdlPack * KRepeat / KXdlPack |
| static constexpr auto | num_buffer_load_b_scale = NRepeat / NXdlPack * KRepeat / KXdlPack * 2 |
| static constexpr auto | async_vmcnt |
| static constexpr auto | async_vmcnt_encoding = 3952 + async_vmcnt % 16 + async_vmcnt / 16 * 16384 |
| static constexpr auto | ScalesPerKBlockSize |
| static constexpr auto | ScalesPerXdlopsRun |
| static constexpr auto | ScalesPerXdlopsRunPerThread |
| static constexpr auto | scale_pack_size_a = sizeof(AScaleDataType) / sizeof(mx_scale_t) |
| static constexpr auto | scale_pack_size_b = sizeof(BScaleDataType) / sizeof(mx_scale_t) |
| static constexpr auto | a_scale_thread_vec_size = KXdlPack * MXdlPack / scale_pack_size_a |
| static constexpr auto | b_scale_thread_vec_size = KXdlPack * NXdlPack / scale_pack_size_b |
| static constexpr auto | ARegBuf = 2 |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | a_scale_thread_desc |
| static constexpr auto | b_scale_thread_desc |
Static Public Attributes inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| static constexpr index_t | APackedSize |
| static constexpr index_t | BPackedSize |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr auto | I2 |
| static constexpr auto | I3 |
| static constexpr index_t | WaveSize |
| static constexpr index_t | A_K0 |
| static constexpr index_t | B_K0 |
| static constexpr index_t | A_K1 |
| static constexpr index_t | B_K1 |
| static constexpr auto | xdlops_gemm |
| static constexpr index_t | AMmaKStride |
| static constexpr index_t | BMmaKStride |
| static constexpr index_t | KThreadChunk |
| static constexpr index_t | KPerThread |
| static constexpr index_t | KRepeat |
| static constexpr index_t | KPerInnerLoop |
| static constexpr index_t | MWaves |
| static constexpr index_t | NWaves |
| static constexpr index_t | MXdlPack |
| static constexpr index_t | NXdlPack |
| static constexpr index_t | KXdlPack |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_m3_k |
| static constexpr BMmaTileDesc | b_block_desc_n0_n1_n2_n3_k |
Member Typedef Documentation
◆ AccType
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AccType = typename Base::AccType |
◆ AThreadCopy
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AThreadCopy = ThreadwiseTensorSliceTransfer_v4<ADataType, ComputeTypeA, decltype(a_block_desc_m0_m1_m2_m3_k), decltype(a_thread_desc_), Sequence<1, 1, 1, 1, KThreadChunk>, Sequence<0, 1, 2, 3, 4>, 4, A_K1, A_K1> |
◆ Base
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base = BlockwiseGemmXdlops_mx_pipeline_base<ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack> |
◆ ComputeTypeA
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeA = typename Base::ComputeTypeA |
◆ ComputeTypeB
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeB = typename Base::ComputeTypeB |
◆ mx_scale_t
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::mx_scale_t = e8m0_bexp_t |
◆ Tuple5
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Tuple5 = typename Base::Tuple5 |
Member Function Documentation
◆ BlockHasHotloop()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ BlockLoopTailNum()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ HotLoopScheduler()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ Run()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc , typename ABlockDesc , typename ABlockTransfer , typename AGridBuffer , typename ABlockBuffer , typename ABlockTransferStep , typename BGridDesc , typename BBlockDesc , typename BBlockTransfer , typename BGridBuffer , typename BBlockBuffer , typename BBlockTransferStep , typename CThreadBuffer , typename AScaleGridBuffer , typename AScaleGridDesc , typename AScaleThreadTransfer , typename BScaleGridBuffer , typename BScaleGridDesc , typename BScaleThreadTransfer >
|
inline |
Member Data Documentation
◆ a_scale_thread_desc
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
make_tuple(Number<MRepeat / MXdlPack>{},
Number<ScalesPerXdlopsRunPerThread * a_scale_thread_vec_size>{}))
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
static constexpr index_t KRepeat
Definition: blockwise_gemm_mx_pipeline_xdlops_base.hpp:77
static constexpr index_t KXdlPack
Definition: blockwise_gemm_mx_pipeline_xdlops_base.hpp:87
◆ a_scale_thread_vec_size
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ a_thread_copy_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| AThreadCopy ck::BlockwiseGemmXdlops_pipeline_bpreshuffle_mx_moe_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()} |
◆ a_thread_desc_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
make_tuple(Number<ARegBuf>{}, I1, Number<MXdlPack>{}, Number<KRepeat>{}, Number<KPack>{}))
static constexpr auto I1
Definition: blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
◆ ARegBuf
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ async_vmcnt
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
HotLoopInstList::B_Buffer_Load_Inst_Num * 2
static constexpr auto num_buffer_load_a_scale
Definition: blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_gufusion_v3.hpp:165
static constexpr auto num_buffer_load_b_scale
Definition: blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_gufusion_v3.hpp:166
◆ async_vmcnt_encoding
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ b_scale_thread_desc
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
make_tuple(Number<NRepeat / NXdlPack>{},
Number<ScalesPerXdlopsRunPerThread * b_scale_thread_vec_size>{}))
◆ b_scale_thread_vec_size
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ GlobalBufferNum
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ HotloopLocalBufSwitch
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ LocalPrefetchStages
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ num_buffer_load_a_scale
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ num_buffer_load_b_scale
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefetchStages
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefillStages
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ scale_pack_size_a
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ scale_pack_size_b
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ ScalesPerKBlockSize
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
KPerBlock / ScaleBlockSize
◆ ScalesPerXdlopsRun
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
(APackedSize * KPack * xdlops_gemm.K0PerXdlops) / ScaleBlockSize
static constexpr auto xdlops_gemm
Definition: blockwise_gemm_mx_pipeline_xdlops_base.hpp:58
static constexpr index_t APackedSize
Definition: blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
◆ ScalesPerXdlopsRunPerThread
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType , typename AScaleDataType , typename BDataType , typename BScaleDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
ScalesPerXdlopsRun / xdlops_gemm.mfma_instr.num_input_blks
static constexpr auto ScalesPerXdlopsRun
Definition: blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_gufusion_v3.hpp:175
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_gufusion_v3.hpp
Public Types inherited from