Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
Protected Types |
Protected Attributes |
Static Protected Attributes |
List of all members
ck::BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
#include <blockwise_gemm_pipeline_xdlops_moe_blockscale_b_preshuffle_gufusion_v3.hpp>
Inheritance diagram for ck::BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >:
Public Types | |
| using | Base = BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > |
Public Types inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| using | ThisThreadBlock = ThisThreadBlock< BlockSize > |
| using | HotLoopInstList = ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, A_K1, B_K1, A_K1, B_K1, MRepeat, NRepeat, MPerXDL, NPerXDL, xdlops_gemm.KPerXdlops > |
| using | Tuple4 = decltype(CalculateAThreadOriginDataIndex()) |
Public Member Functions | |
| template<bool HasMainLoop, int NumKBlockPerScale, TailNumber TailNum, typename AGridDesc , typename ABlockDesc , typename ABlockTransfer , typename AGridBuffer , typename ABlockBuffer , typename ABlockTransferStep , typename BGridDesc , typename BBlockDesc , typename BBlockTransfer , typename BGridBuffer , typename BBlockBuffer , typename BBlockTransferStep , typename CScaleThreadDesc , typename CThreadBuffer , typename AScaleGridBuffer , typename AScaleGridDesc , typename AScaleThreadDesc , typename AScaleThreadTransfer , typename AScaleThreadTransferStep , typename BScaleGridBuffer , typename BScaleGridDesc , typename BScaleThreadDesc , typename BScaleThreadTransfer , typename BScaleThreadTransferStep > | |
| __device__ void | Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, BBlockTransfer &b_blockwise_copy_up, const BGridBuffer &b_grid_buf, const BGridBuffer &b_grid_buf_up, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, const CScaleThreadDesc &c_scale_thread_desc, CThreadBuffer &c_thread_buf, CThreadBuffer &c_thread_buf_up, const AScaleGridDesc &a_scale_grid_desc, const AScaleThreadDesc &a_scale_thread_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const AScaleThreadTransferStep &a_scale_thread_copy_step, const BScaleGridDesc &b_scale_grid_desc, const BScaleThreadDesc &b_scale_thread_desc, BScaleThreadTransfer &b_scale_thread_copy, BScaleThreadTransfer &b_scale_thread_copy_up, const BScaleGridBuffer &b_scale_grid_buf, const BScaleGridBuffer &b_scale_grid_buf_up, const BScaleThreadTransferStep &b_scale_thread_copy_step, index_t num_loop) const |
Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| __host__ constexpr __device__ auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_pipeline_base (Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex()) |
| Constructor for BlockwiseGemmXdlops_pipeline_base. More... | |
Static Public Member Functions | |
| template<typename TileDesc_M0_M1_M2_K > | |
| __host__ static constexpr __device__ auto | MakeAGemmMmaTileDescriptor (const TileDesc_M0_M1_M2_K &) |
| __host__ static constexpr __device__ bool | BlockHasHotloop (index_t num_loop) |
| __host__ static constexpr __device__ TailNumber | BlockLoopTailNum (index_t num_loop) |
| static constexpr __device__ auto | HotLoopScheduler () |
Static Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateAThreadOriginDataIndex6D () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| static __device__ auto | CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static constexpr __device__ auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static constexpr __device__ auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static constexpr __device__ auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static constexpr __device__ auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static constexpr __device__ auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static constexpr __device__ auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static constexpr __device__ auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| __host__ static constexpr __device__ auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static constexpr __device__ auto | GetCThreadDesc () |
Protected Attributes | |
| AThreadCopy | a_thread_copy_ {Base::CalculateAThreadOriginDataIndex6D()} |
Protected Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| AThreadCopy | a_thread_copy_ |
| BThreadCopy | b_thread_copy_ |
Static Protected Attributes | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr BTileDesc | b_block_desc_n0_n1_k0_k1 |
Static Protected Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
Additional Inherited Members | |
Public Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
Member Typedef Documentation
◆ AThreadCopy
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
◆ Base
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_moe_blockscale_bpreshuffle_gufusion_v3< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base = BlockwiseGemmXdlops_pipeline_base<BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true> |
Member Function Documentation
◆ BlockHasHotloop()
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ BlockLoopTailNum()
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ HotLoopScheduler()
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ MakeAGemmMmaTileDescriptor()
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename TileDesc_M0_M1_M2_K >
|
inlinestaticconstexpr |
◆ Run()
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<bool HasMainLoop, int NumKBlockPerScale, TailNumber TailNum, typename AGridDesc , typename ABlockDesc , typename ABlockTransfer , typename AGridBuffer , typename ABlockBuffer , typename ABlockTransferStep , typename BGridDesc , typename BBlockDesc , typename BBlockTransfer , typename BGridBuffer , typename BBlockBuffer , typename BBlockTransferStep , typename CScaleThreadDesc , typename CThreadBuffer , typename AScaleGridBuffer , typename AScaleGridDesc , typename AScaleThreadDesc , typename AScaleThreadTransfer , typename AScaleThreadTransferStep , typename BScaleGridBuffer , typename BScaleGridDesc , typename BScaleThreadDesc , typename BScaleThreadTransfer , typename BScaleThreadTransferStep >
|
inline |
Member Data Documentation
◆ a_block_desc_m0_m1_m2_k0_k1_k2
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_k
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:353
__host__ static constexpr __device__ auto MakeAGemmMmaTileDescriptor(const TileDesc_M0_M1_M2_K &)
Definition: blockwise_gemm_pipeline_xdlops_moe_blockscale_b_preshuffle_gufusion_v3.hpp:162
◆ a_thread_copy_
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
◆ a_thread_desc_
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
Initial value:
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
static constexpr auto I1
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:37
static constexpr auto I2
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:38
◆ b_block_desc_n0_n1_k0_k1
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ b_thread_desc_
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
Initial value:
make_tuple(Number<NRepeat>{}, I1, Number<KRepeat>{}, Number<KPack>{}))
◆ GlobalBufferNum
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ HotloopLocalBufSwitch
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefetchStages
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefillStages
template<index_t BlockSize, typename ADataType , typename BDataType , typename ComputeDataType , typename AccDataType , typename ATileDesc , typename BTileDesc , typename AMmaTileDesc , typename BMmaTileDesc , index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.1/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_moe_blockscale_b_preshuffle_gufusion_v3.hpp
Public Types inherited from