Class Members - Variables
Class Members - Variables
Composable Kernel: Class Members - Variables
- r -
- r_grid_desc_m_
: ck::tensor_operation::device::DeviceGemmMultipleDMultipleR_Xdl_CShuffle< ALayout, BLayout, DELayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEReduceThreadTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle< NDimSpatial, ALayout, BLayout, DELayout, RLayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, ConvForwardSpecialization, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
- rand_val_ptr
: ck_tile::FmhaBwdDQDKDVKernel< FmhaPipeline_, KGradEpiloguePipeline_, VGradEpiloguePipeline_ >::FmhaBwdCommonDropoutKargs
, ck_tile::FmhaFwdKernel< FmhaPipeline_, EpiloguePipeline_ >::FmhaFwdCommonDropoutKargs
- range
: ck::BufferResource< T >
, ck_tile::buffer_resource
- rank
: ck_tile::GenericPermuteHostArgs
- raw_lengths_m_n_k_o_
: ck::tensor_operation::device::DeviceBatchedGemmGemm_Xdl_CShuffle< ALayout, BLayout, B1Layout, CLayout, ADataType, BDataType, B1DataType, CDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceBatchedGemmSoftmaxGemm_Xdl_CShuffle< ALayout, BLayout, B1Layout, CLayout, ADataType, BDataType, B1DataType, CDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskOutUpperTriangle, LoopSched >::Argument
- raw_lengths_mz_lz_kz_nz_
: ck::tensor_operation::device::DeviceBatchedGemmSoftmaxGemmPermute_Wmma_CShuffle< NumDimG, NumDimM, NumDimL, NumDimK, NumDimN, ADataType, B0DataType, B1DataType, CDataType, Acc0BiasDataType, Acc0DataType, Acc1BiasDataType, Acc1DataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, B0Spec, B1Spec, CSpec, NumPrefetch, BlockSize, MPerBlock, LPerBlock, KPerBlock, AK1, BK1, NPerBlock, LTilePerBlock, L1, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched, PipelineVer >::Argument
, ck::tensor_operation::device::DeviceGroupedQueryAttentionForward_Wmma< NumDimG, NumDimM, NumDimL, NumDimK, NumDimN, ADataType, B0DataType, B1DataType, CDataType, Acc0BiasDataType, Acc0DataType, Acc1BiasDataType, Acc1DataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, B0Spec, B1Spec, CSpec, NumPrefetch, QueryGroupNumber, BlockSize, MPerBlock, LPerBlock, KPerBlock, AK1, BK1, NPerBlock, LTilePerBlock, L1, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched, PipelineVer >::Argument
, ck::tensor_operation::device::DeviceMultiQueryAttentionForward_Wmma< NumDimG, NumDimM, NumDimL, NumDimK, NumDimN, ADataType, B0DataType, B1DataType, CDataType, Acc0BiasDataType, Acc0DataType, Acc1BiasDataType, Acc1DataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, B0Spec, B1Spec, CSpec, NumPrefetch, BlockSize, MPerBlock, LPerBlock, KPerBlock, AK1, BK1, NPerBlock, LTilePerBlock, L1, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched, PipelineVer >::Argument
- raw_lengths_mz_nz_kz_gemm1nz_
: ck::tensor_operation::device::DeviceBatchedGemmSoftmaxGemmPermute_Xdl_CShuffle< NumDimG, NumDimM, NumDimN, NumDimK, NumDimO, ADataType, BDataType, B1DataType, CDataType, D0sDataType, D1sDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, C0DEElementwiseOperation, B1ElementwiseOperation, C1DEElementwiseOperation, GemmSpec, ASpec, BSpec, B1Spec, CSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, D0sTransferSrcScalarPerVector, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle< NumDimG, NumDimM, NumDimN, NumDimK, NumDimO, ADataType, BDataType, B1DataType, CDataType, Acc0BiasDataType, Acc1BiasDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, BSpec, B1Spec, CSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched >::GroupDeviceArg
- raw_scale
: ck_tile::FmhaBwdDQDKDVKernel< FmhaPipeline_, KGradEpiloguePipeline_, VGradEpiloguePipeline_ >::FmhaBwdCommonKargs
- reduce_grid_desc_m_
: ck::tensor_operation::device::DeviceBatchedGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmBiasAddReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, BiasDataType, D0DataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, D0ElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
- reduce_grid_desc_mblock_mperblock_
: ck::tensor_operation::device::DeviceBatchedGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmBiasAddReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, BiasDataType, D0DataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, D0ElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
- reduce_in_element_ops_
: ck::tensor_operation::device::DeviceBatchedGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmBiasAddReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, BiasDataType, D0DataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, D0ElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
- reduce_init
: ck_tile::BlockReduce2D< InDistributedTensor_ >
- reduce_length
: ck::tensor_operation::device::DeviceBatchNormBwdImpl< XDataType, DxDataType, DyDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::Argument
- reduce_length_
: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::Argument
- reduce_lowest_length
: ck::tensor_operation::device::DeviceReduceMultiBlock< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, OutMemoryDataOperation, PropagateNan, OutputIndex, HaveIndexInputIfOutputIndex, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::Argument
, ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::Argument
, ck::tensor_operation::device::DeviceReduceThreadWiseMultiD< InDataType, DsDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, OutElementwiseOperation, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize, DsVectorSizeSequence >::Argument
- reduce_out_element_ops_
: ck::tensor_operation::device::DeviceBatchedGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmBiasAddReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, BiasDataType, D0DataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, D0ElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGemmReduce_Xdl_CShuffle< ALayout, BLayout, CLayout, ADataType, BDataType, CDataType, GemmAccDataType, CShuffleDataType, ReduceAccDataType, ReducePtrsGlobal, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, ReduceOperations, ReduceInElementwiseOperations, ReduceAccElementwiseOperations, ReduceGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, CReduceThreadClusterLengths_MPerBlock_NPerBlock, CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock, CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock, LoopSched >::Argument
- reduce_total_length
: ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::Argument
, ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::Argument
, ck::tensor_operation::device::DeviceReduceMultiBlock< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, OutMemoryDataOperation, PropagateNan, OutputIndex, HaveIndexInputIfOutputIndex, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::Argument
, ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::Argument
, ck::tensor_operation::device::DeviceReduceThreadWiseMultiD< InDataType, DsDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, OutElementwiseOperation, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize, DsVectorSizeSequence >::Argument
- reduceAllDim
: ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >
, ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >
, ck::tensor_operation::device::DeviceNormalizationBwdDataImpl< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsGammaFastestDimReduced, GammaSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, IsDxFastestDimReduced, DXDstVectorSize >
, ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >
, ck::tensor_operation::device::DeviceNormalizationFwdImpl< XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, YElementwiseOperation, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdDstVectorSize, UseWelford >
, ck::tensor_operation::device::DeviceNormalizationFwdSplitKImpl< XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, YElementwiseOperation, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdDstVectorSize >
, ck::tensor_operation::device::DeviceReduceMultiBlock< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, OutMemoryDataOperation, PropagateNan, OutputIndex, HaveIndexInputIfOutputIndex, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >
, ck::tensor_operation::device::DeviceReduceThreadWise< InDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, PropagateNan, OutputIndex, TransformIndexKtoGlobal, HaveIndexInputIfOutputIndex, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >
, ck::tensor_operation::device::DeviceReduceThreadWiseMultiD< InDataType, DsDataType, AccDataType, OutDataType, Rank, NumReduceDim, ReduceOperation, InElementwiseOperation, OutElementwiseOperation, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize, DsVectorSizeSequence >
, ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >
- reduction_space_offset
: ck::GridwiseBatchedGemmMultipleDSoftmaxGemm_Xdl_CShuffle< FloatAB, FloatGemmAcc, FloatCShuffle, FloatC, D0sDataType, AElementwiseOperation, BElementwiseOperation, C0DEElementwiseOperation, B1ElementwiseOperation, C1DEElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc_AK0_M_AK1, BGridDesc_BK0_N_BK1, B1GridDesc_BK0_N_BK1, C1GridDesc_M_N, D0sGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1Value, BK1Value, B1K1Value, MPerXdl, NPerXdl, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PadN, MaskOutUpperTriangle, D0sTransferSrcScalarPerVector, PipelineVer >::SharedMemTrait
, ck::GridwiseBatchedGemmSoftmaxGemm_Wmma< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0EnableLds, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1EnableLds, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, MaskOutUpperTriangle, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait
, ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle< FloatAB, FloatGemmAcc, FloatCShuffle, FloatC, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc_AK0_M_AK1, BGridDesc_BK0_N_BK1, B1GridDesc_BK0_N_BK1, CGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1Value, BK1Value, B1K1Value, MPerXdl, NPerXdl, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PadN, MaskOutUpperTriangle, PipelineVer >::SharedMemTrait
- reduction_space_size_aligned
: ck::GridwiseBatchedGemmMultipleDSoftmaxGemm_Xdl_CShuffle< FloatAB, FloatGemmAcc, FloatCShuffle, FloatC, D0sDataType, AElementwiseOperation, BElementwiseOperation, C0DEElementwiseOperation, B1ElementwiseOperation, C1DEElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc_AK0_M_AK1, BGridDesc_BK0_N_BK1, B1GridDesc_BK0_N_BK1, C1GridDesc_M_N, D0sGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1Value, BK1Value, B1K1Value, MPerXdl, NPerXdl, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PadN, MaskOutUpperTriangle, D0sTransferSrcScalarPerVector, PipelineVer >::SharedMemTrait
, ck::GridwiseBatchedGemmSoftmaxGemm_Wmma< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0EnableLds, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1EnableLds, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, MaskOutUpperTriangle, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait
, ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle< FloatAB, FloatGemmAcc, FloatCShuffle, FloatC, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc_AK0_M_AK1, BGridDesc_BK0_N_BK1, B1GridDesc_BK0_N_BK1, CGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1Value, BK1Value, B1K1Value, MPerXdl, NPerXdl, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PadN, MaskOutUpperTriangle, PipelineVer >::SharedMemTrait
- reduction_start_block_idx
: ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >
, ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >
- ReductionStrategy
: ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >
, ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >
- relu_
: ck::tensor_operation::element_wise::DynamicUnaryOp
- reorder_thread_cluster
: ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >
, ck::GridwiseBatchNormForwardWithBlockwiseWelford< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >
, ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >
, ck::GridwiseMultiblockBatchNormForward< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, XYGridDesc_M_K, MeanVarCountGridDesc_M_G, MeanVarCountGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >
, ck::GridwiseMultiblockWelfordFirstHalf< XDataType, AccDataType, MeanVarDataType, XGridDesc_M_K, MeanVarCountGridDesc_M_G, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcCountSrcVectorDim, XSrcCountSrcVectorSize >
, ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >
, ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >
, ck::GridwiseNormalizationNaiveVariance_mk_to_mk< XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize, SweepOnce >
, ck::GridwiseNormalizationSplitK1st< XDataType, ComputeDataType, MeanVarDataType, XGridDesc_M_K, MeanVarGridDesc_M_KBlock, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize >
, ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >
, ck::GridwiseNormalizationWelfordVariance_mk_to_mk< XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize, SweepOnce >
, ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >
, ck::GridwiseReduction_mk_to_m_multiblock< InDataType, OutDataType, AccDataType, IndexDataType, InGridDesc_M_K, OutGridDesc_M, ReduceOperation, InElementwiseOperation, AccElementwiseOperation, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >
, ck::GridwiseSoftmax_mk_to_mk< InDataType, OutDataType, AccDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize, SweepOnce >
, ck::GridwiseWelfordSecondHalfBatchNormForwardFinal< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, XYGridDesc_M_K, MeanVarCountGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >
, ck::GridwiseWelfordSecondHalfReduceFirstHalf< XDataType, DyDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, MeanVarGridDesc_M, MeanVarCountGridDesc_M_K, DscaleDbiasGridDesc_M_G, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyVectorDim, XSrcVectorSize, DySrcVectorSize, MeanVarSrcVectorSize >
- Repeat_K
: ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
, ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_Base
- Repeat_K0
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- Repeat_K1
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- Repeat_M
: ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
, ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_Base
, ck_tile::Generic2dBlockShape< BlockTile_, WarpPerBlock_, WarpTile_, Vector_ >
- Repeat_M0
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- Repeat_M1
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- Repeat_N
: ck_tile::AddRmsnorm2dRdquantFwd< Pipeline_ >
, ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
, ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_Base
, ck_tile::Generic2dBlockShape< BlockTile_, WarpPerBlock_, WarpTile_, Vector_ >
, ck_tile::Layernorm2dFwd< Pipeline_, Epilogue_ >
, ck_tile::MoeSmoothquant< Pipeline_ >
, ck_tile::Rmsnorm2dFwd< Pipeline_, Epilogue_ >
, ck_tile::Smoothquant< Pipeline_ >
- Repeat_N0
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- Repeat_N1
: ck_tile::FusedMoeGemmShape< BlockTile_0_, WarpPerBlock_0_, WarpTile_0_, BlockTile_1_, WarpPerBlock_1_, WarpTile_1_ >
- requantScale_
: ck::tensor_operation::element_wise::Activation_Mul_Clamp< Activation >
, ck::tensor_operation::element_wise::Add_Activation_Mul_Clamp< Activation >
- resultRunningMean_
: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::Argument
- resultRunningVariance_
: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::Argument
- resultSaveInvVariance_
: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::Argument
- resultSaveMean_
: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::Argument
- rh_major_minor_to_adaptor_hidden_idss_
: ck_tile::detail::tile_distribution_detail< RhMajorMinor2AdaptorHiddenIdss >
- rhs_lengthss_
: ck_tile::tile_distribution_encoding< RsLengths_, HsLengthss_, Ps2RHssMajor_, Ps2RHssMinor_, Ys2RHsMajor_, Ys2RHsMinor_ >::detail
- rhs_major_minor_to_span_minor_
: ck_tile::tile_distribution_encoding< RsLengths_, HsLengthss_, Ps2RHssMajor_, Ps2RHssMinor_, Ys2RHsMajor_, Ys2RHsMinor_ >::detail
- rhs_major_minor_to_ys_
: ck_tile::tile_distribution_encoding< RsLengths_, HsLengthss_, Ps2RHssMajor_, Ps2RHssMinor_, Ys2RHsMajor_, Ys2RHsMinor_ >::detail
- right_pad_length_
: ck::Pad< LowLength, LeftPadLength, RightPadLength, SkipIsValidCheck >
, ck::RightPad< LowLength, RightPadLength, SkipIsValidCheck >
, ck_tile::pad< LowLength, LeftPadLength, RightPadLength, SkipIsValidCheck >
, ck_tile::right_pad< LowLength, RightPadLength, SkipIsValidCheck >
- rotary_cos_ptr
: ck_tile::FmhaFwdAppendKVKernel< FmhaPipeline_ >::RoPEKargs
- rotary_dim
: ck_tile::FmhaFwdAppendKVKernel< FmhaPipeline_ >::RoPEKargs
- rotary_sin_ptr
: ck_tile::FmhaFwdAppendKVKernel< FmhaPipeline_ >::RoPEKargs
- RotaryEnum
: ck_tile::BlockFmhaFwdAppendKVPipeline< Problem_, Policy_ >
, ck_tile::BlockFmhaFwdAppendKVPipelineProblem< QDataType_, KDataType_, VDataType_, kM0_, kN0_, kK0_, kN1_, kIsVLayoutRowMajor_, RotaryEnum_, kIsPagedKV_, Traits_ >
- rotating_count
: StreamConfig
- row_stride_o
: ck_tile::FmhaFwdSplitKVCombineKernel< FmhaPipeline_, EpiloguePipeline_ >::CommonKargs
- row_stride_o_acc
: ck_tile::FmhaFwdSplitKVCombineKernel< FmhaPipeline_, EpiloguePipeline_ >::CommonKargs
- RowPerSubBlock
: ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >
- RowsPerBlock
: ck_tile::TopkSoftmaxWarpPerRowProblem< InputType_, WeightType_, IndexType_, Experts_, IssuesPerCol_, BytesPerIssue_, LaunchType_, BlockSize_ >
- RowsPerWarp
: ck_tile::TopkSoftmaxWarpPerRowProblem< InputType_, WeightType_, IndexType_, Experts_, IssuesPerCol_, BytesPerIssue_, LaunchType_, BlockSize_ >
- RowsPerWarpPerColIssue
: ck_tile::TopkSoftmaxWarpPerRowProblem< InputType_, WeightType_, IndexType_, Experts_, IssuesPerCol_, BytesPerIssue_, LaunchType_, BlockSize_ >
- RowSubBlocks
: ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >
- rp_undrop
: ck_tile::BlockDropout
, ck_tile::BlockDropoutBwd< true, IsWG32_, IsStoreRandval_ >
, ck_tile::FmhaBwdDQDKDVKernel< FmhaPipeline_, KGradEpiloguePipeline_, VGradEpiloguePipeline_ >::FmhaBwdCommonDropoutKargs
, ck_tile::FmhaFwdKernel< FmhaPipeline_, EpiloguePipeline_ >::FmhaFwdCommonDropoutKargs
- rs_element_op_
: ck::tensor_operation::device::DeviceGemmMultipleDMultipleR_Xdl_CShuffle< ALayout, BLayout, DELayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEReduceThreadTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle< NDimSpatial, ALayout, BLayout, DELayout, RLayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, ConvForwardSpecialization, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
- rs_grid_desc_mblock_mperblock_
: ck::tensor_operation::device::DeviceGemmMultipleDMultipleR_Xdl_CShuffle< ALayout, BLayout, DELayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEReduceThreadTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
, ck::tensor_operation::device::DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle< NDimSpatial, ALayout, BLayout, DELayout, RLayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, ReduceAccDataType, RsDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, QsElementwiseOperation, RsElementwiseOperation, ThreadReduceOperations, RsGlobalMemoryDataOperation, ConvForwardSpecialization, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDRThreadTransferClusterLengths_MPerBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, RThreadTransferDstScalarPerVector_MPerBlock, LoopSched >::Argument
- rs_lengths_
: ck_tile::tile_distribution_encoding< RsLengths_, HsLengthss_, Ps2RHssMajor_, Ps2RHssMinor_, Ys2RHsMajor_, Ys2RHsMinor_ >