ComputeBasePtrOfStridedBatch Struct Reference#
ck::tensor_operation::device::DeviceBatchedGemmSoftmaxGemmPermute_Wmma_CShuffle< NumDimG, NumDimM, NumDimL, NumDimK, NumDimN, ADataType, B0DataType, B1DataType, CDataType, Acc0BiasDataType, Acc0DataType, Acc1BiasDataType, Acc1DataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, B0Spec, B1Spec, CSpec, NumPrefetch, BlockSize, MPerBlock, LPerBlock, KPerBlock, AK1, BK1, NPerBlock, LTilePerBlock, L1, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched, PipelineVer >::ComputeBasePtrOfStridedBatch Struct Reference
#include <device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp>
Public Member Functions | |
| __host__ __device__ | ComputeBasePtrOfStridedBatch (const AGridDesc_G_M_K &a_grid_desc_g_m_k, const B0GridDesc_G_L_K &b0_grid_desc_g_l_k, const B1GridDesc_G_N_L &b1_grid_desc_g_n_l, const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ constexpr __device__ long_index_t | GetABasePtr (index_t g_idx) const |
| __host__ constexpr __device__ long_index_t | GetB0BasePtr (index_t g_idx) const |
| __host__ constexpr __device__ long_index_t | GetB1BasePtr (index_t g_idx) const |
| __host__ constexpr __device__ long_index_t | GetCBasePtr (index_t g_idx) const |
Constructor & Destructor Documentation
◆ ComputeBasePtrOfStridedBatch()
template<index_t NumDimG, index_t NumDimM, index_t NumDimL, index_t NumDimK, index_t NumDimN, typename ADataType , typename B0DataType , typename B1DataType , typename CDataType , typename Acc0BiasDataType , typename Acc0DataType , typename Acc1BiasDataType , typename Acc1DataType , typename CShuffleDataType , typename AElementwiseOperation , typename B0ElementwiseOperation , typename AccElementwiseOperation , typename B1ElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization B0Spec, TensorSpecialization B1Spec, TensorSpecialization CSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1 , typename B0BlockTransferThreadClusterArrangeOrder , typename B0BlockTransferSrcAccessOrder , ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1 , typename B1BlockTransferThreadClusterArrangeOrder , typename B1BlockTransferSrcAccessOrder , ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inline |
Member Function Documentation
◆ GetABasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimL, index_t NumDimK, index_t NumDimN, typename ADataType , typename B0DataType , typename B1DataType , typename CDataType , typename Acc0BiasDataType , typename Acc0DataType , typename Acc1BiasDataType , typename Acc1DataType , typename CShuffleDataType , typename AElementwiseOperation , typename B0ElementwiseOperation , typename AccElementwiseOperation , typename B1ElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization B0Spec, TensorSpecialization B1Spec, TensorSpecialization CSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1 , typename B0BlockTransferThreadClusterArrangeOrder , typename B0BlockTransferSrcAccessOrder , ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1 , typename B1BlockTransferThreadClusterArrangeOrder , typename B1BlockTransferSrcAccessOrder , ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineconstexpr |
◆ GetB0BasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimL, index_t NumDimK, index_t NumDimN, typename ADataType , typename B0DataType , typename B1DataType , typename CDataType , typename Acc0BiasDataType , typename Acc0DataType , typename Acc1BiasDataType , typename Acc1DataType , typename CShuffleDataType , typename AElementwiseOperation , typename B0ElementwiseOperation , typename AccElementwiseOperation , typename B1ElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization B0Spec, TensorSpecialization B1Spec, TensorSpecialization CSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1 , typename B0BlockTransferThreadClusterArrangeOrder , typename B0BlockTransferSrcAccessOrder , ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1 , typename B1BlockTransferThreadClusterArrangeOrder , typename B1BlockTransferSrcAccessOrder , ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineconstexpr |
◆ GetB1BasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimL, index_t NumDimK, index_t NumDimN, typename ADataType , typename B0DataType , typename B1DataType , typename CDataType , typename Acc0BiasDataType , typename Acc0DataType , typename Acc1BiasDataType , typename Acc1DataType , typename CShuffleDataType , typename AElementwiseOperation , typename B0ElementwiseOperation , typename AccElementwiseOperation , typename B1ElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization B0Spec, TensorSpecialization B1Spec, TensorSpecialization CSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1 , typename B0BlockTransferThreadClusterArrangeOrder , typename B0BlockTransferSrcAccessOrder , ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1 , typename B1BlockTransferThreadClusterArrangeOrder , typename B1BlockTransferSrcAccessOrder , ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineconstexpr |
◆ GetCBasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimL, index_t NumDimK, index_t NumDimN, typename ADataType , typename B0DataType , typename B1DataType , typename CDataType , typename Acc0BiasDataType , typename Acc0DataType , typename Acc1BiasDataType , typename Acc1DataType , typename CShuffleDataType , typename AElementwiseOperation , typename B0ElementwiseOperation , typename AccElementwiseOperation , typename B1ElementwiseOperation , typename CElementwiseOperation , GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization B0Spec, TensorSpecialization B1Spec, TensorSpecialization CSpec, ck::index_t NumPrefetch, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t NPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1 , typename ABlockTransferThreadClusterArrangeOrder , typename ABlockTransferSrcAccessOrder , ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1 , typename B0BlockTransferThreadClusterArrangeOrder , typename B0BlockTransferSrcAccessOrder , ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1 , typename B1BlockTransferThreadClusterArrangeOrder , typename B1BlockTransferSrcAccessOrder , ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock , index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, ck::LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.2/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp