/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.1.0/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp Source File#
device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp
Go to the documentation of this file.
41 * strided batched, but we can easily extend to other layouts. The returned offset can be either \p
49 * \note Using \p ComputePtrOffsetOfBatch gives us the flexibility that 2 workgroups can compute 2
52 * device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp kernel_gemm_xdlops_v2r3_for_conv3d \endlink for \link
53 * DeviceConv3d \endlink uses the same concept, but currently does NOT encapsulate the computing of
57 * implementation we can avoid copy data to workspace before kernel launch since number of groups is
61 * \note \p Block2ETileMap allows customized mapping between a workgroup and the C-tile it computes.
62 * Together with \p ComputePtrOffsetOfBatch, we can reuse GridwiseGemm (and GridwiseGemm fusion ) to
594 static constexpr index_t ElementwiseBlocksize = ClusterLengthMPerBlock * ClusterLengthNPerBlock;
1791 << "TransposeTransferOutScalarPerVectorAligned: " << TransposeTransferOutScalarPerVectorAligned;
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:14
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:91
__host__ constexpr __device__ auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:72
__host__ constexpr __device__ index_t gcd(index_t x, index_t y)
Definition: math.hpp:154
GemmSpecialization
Definition: gemm_specialization.hpp:11
std::string getConvBackwardDataSpecializationString(const ConvolutionBackwardDataSpecialization &s)
Definition: convolution_backward_data_specialization.hpp:17
ConvolutionBackwardDataSpecialization
Definition: convolution_backward_data_specialization.hpp:11
@ Filter1x1Stride1Pad0
CK_TILE_HOST float launch_kernel(const stream_config &s, Callables &&... callables)
Definition: kernel_launch.hpp:144
Definition: ck.hpp:266
__global__ void kernel_batched_elementwise(const InGridDescTuple in_grid_desc_tuple, const OutGridDescTuple out_grid_desc_tuple, const InDataTypePointerTuple p_in_global_tuple, const OutDataTypePointerTuple p_out_global_tuple, const Block2TileMap block_2_tile_map, const ElementwiseOperation elementwise_op, const index_t batch_count, const std::array< index_t, NumInputs > input_batch_strides, const std::array< index_t, NumOutputs > output_batch_strides)
Definition: gridwise_elementwise_2d.hpp:221
typename tuple_element< I, TTuple >::type tuple_element_t
Definition: tuple.hpp:208
__host__ constexpr __device__ auto generate_tuple(F &&f, Number< N >)
Definition: tuple_helper.hpp:21
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__device__ uint32_t amd_wave_read_first_lane(uint32_t value)
Definition: amd_wave_read_first_lane.hpp:100
typename conditional< predicate, X, Y >::type conditional_t
Definition: functional.hpp:115
__host__ constexpr __device__ auto make_pass_through_transform(const LowLength &low_length)
Definition: multi_index_transform_helper.hpp:12
__global__ void kernel_elementwise_batched_dual(const InAGridDescTuple in_grid_desc_tuple_a, const InBGridDescTuple in_grid_desc_tuple_b, const OutAGridDescTuple out_grid_desc_tuple_a, const OutBGridDescTuple out_grid_desc_tuple_b, const InADataTypePointerTuple p_in_global_tuple_a, const InBDataTypePointerTuple p_in_global_tuple_b, const OutADataTypePointerTuple p_out_global_tuple_a, const OutBDataTypePointerTuple p_out_global_tuple_b, const Block2TileMapA block_2_tile_map_a, const Block2TileMapB block_2_tile_map_b, const ElementwiseOperation elementwise_op, const index_t a_grid_size, const index_t batch_count_a, const index_t batch_count_b, const std::array< index_t, NumInputsA > input_batch_strides_a, const std::array< index_t, NumInputsB > input_batch_strides_b, const std::array< index_t, NumOutputsA > output_batch_strides_a, const std::array< index_t, NumOutputsB > output_batch_strides_b)
Definition: gridwise_elementwise_2d.hpp:117
__host__ constexpr __device__ auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition: tensor_descriptor.hpp:319
constexpr LoopScheduler make_default_loop_scheduler()
Definition: loop_scheduler.hpp:20
Definition: stream_config.hpp:10
Definition: gridwise_elementwise_2d.hpp:278
Definition: gridwise_gemm_multiple_d_xdl_cshuffle.hpp:78
decltype(MakeDsGridPointer()) DsGridPointer
Definition: gridwise_gemm_multiple_d_xdl_cshuffle.hpp:406
Definition: sequence.hpp:43
Definition: tuple.hpp:117
Definition: integral_constant.hpp:20
Definition: functional2.hpp:33
Definition: tensor_layout.hpp:223
Definition: tensor_layout.hpp:228
Definition: tensor_layout.hpp:120
Definition: tensor_layout.hpp:347
Definition: tensor_layout.hpp:115
Definition: tensor_layout.hpp:342
Definition: transform_conv_bwd_data_to_gemm_v1.hpp:36
__host__ __device__ auto MakeADescriptor_AK0_M_AK1() const
Definition: transform_conv_bwd_data_to_gemm_v1.hpp:651
__host__ __device__ auto MakeBDescriptor_BK0_N_BK1() const
Definition: transform_conv_bwd_data_to_gemm_v1.hpp:897
__host__ __device__ auto MakeCDescriptor_M_N() const
Definition: transform_conv_bwd_data_to_gemm_v1.hpp:1104
Definition: transform_conv_ngchw_to_nhwgc.hpp:31
Definition: device_base.hpp:51
void * p_workspace_
Definition: device_base.hpp:58
Definition: device_base.hpp:62
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:651
std::size_t GetWorkspaceETensorSizeBytes() const
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1029
index_t conv_N_per_block_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1077
index_t num_workgroups_per_Conv_N_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1111
std::vector< DsGridDesc_M_N > ds_grid_desc_m_n_container_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1080
NGCHWTransposeDescType e_out_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1088
const index_t k_batch_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1110
index_t gemms_count_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1113
std::array< index_t, NDimSpatial+3 > a_g_n_k_wos_lengths_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1103
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::Argument::p_e_grid_
EDataType * p_e_grid_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1073
Argument(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOp &a_element_op, const BElementwiseOp &b_element_op, const CDEElementwiseOp &cde_element_op, ck::index_t split_k=1)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:652
GKCYXTransposeDescType b_in_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1090
long_index_t e_space_size_bytes
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1117
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::Argument::p_a_grid_
const ADataType * p_a_grid_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1070
std::array< index_t, NDimSpatial > conv_filter_strides_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1106
std::array< index_t, NDimSpatial+3 > e_g_n_c_wis_lengths_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1105
std::vector< std::array< GemmArgs, MaxGroupedGemmGroupsNum > > gemm_kernel_args_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1114
std::array< index_t, NDimSpatial > input_left_pads_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1107
Block2TileMapInOutElementwise elementwise_block_2_ctile_map_transpose_a_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1084
std::vector< AGridDesc_M_K > a_grid_desc_m_k_container_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1078
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::Argument::p_b_grid_
const BDataType * p_b_grid_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1071
Block2TileMapInOutElementwise elementwise_block_2_ctile_map_transpose_e_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1085
CDEElementwiseOp cde_element_op_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1101
ComputePtrOffsetOfStridedBatch< I1, I1, NumDTensor > compute_ptr_offset_of_batch_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1094
bool bwd_needs_zero_out
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1116
NHWGCTransposeDescType e_in_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1089
AElementwiseOp a_element_op_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1099
std::vector< BGridDesc_N_K > b_grid_desc_n_k_container_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1079
void Print() const
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1049
std::vector< index_t > gemms_grid_size_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1112
NHWGCTransposeDescType a_out_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1089
index_t num_group_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1076
std::array< index_t, NDimSpatial+3 > b_g_k_c_xs_lengths_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1104
std::array< index_t, NDimSpatial > input_right_pads_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1108
ComputePtrOffsetOfStridedBatch< I1, I1, I0 > compute_ptr_offset_of_workspace_n_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1096
std::vector< EGridDesc_M_N > e_grid_desc_m_n_container_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1081
std::size_t GetWorkspaceSizeBytes() const
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1043
std::size_t GetWorkspaceBTensorSizeBytes() const
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1014
NGCHWTransposeDescType a_in_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1088
GKYXCTransposeDescType b_out_transpose_desc_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1091
std::size_t GetWorkspaceATensorSizeBytes() const
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:999
BElementwiseOp b_element_op_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1100
GridwiseGemm::DsGridPointer p_ds_grid_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1072
ComputePtrOffsetOfStridedBatch< I1, I1, I0 > compute_ptr_offset_of_n_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1095
Block2TileMapWeiElementwise elementwise_block_2_ctile_map_transpose_b_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1086
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:519
index_t BlockStart_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:557
DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock ds_grid_desc_mblock_mperblock_nblock_nperblock_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:552
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::GemmArgs::BlockEnd_
index_t BlockEnd_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:557
bool HasMainKBlockLoop_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:558
EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock e_grid_desc_mblock_mperblock_nblock_nperblock_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:553
AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:549
GroupedGemmBlock2ETileMap block_2_ctile_map_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:556
GemmArgs()=default
BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1_
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:550
GemmArgs(AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1, BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1, DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock ds_grid_desc_mblock_mperblock_nblock_nperblock, EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock e_grid_desc_mblock_mperblock_nblock_nperblock, GroupedGemmBlock2ETileMap block_2_ctile_map, index_t BlockStart, index_t BlockEnd, bool HasMainKBlockLoop)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:521
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1122
float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1431
float Run(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1284
float RunMultiDGemm(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1126
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:303
std::conditional_t< is_NGCHW_GKCYX_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::GKYXC, std::conditional_t< is_NGCDHW_GKCZYX_NGKDHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::GKZYXC, BLayout > > BLayoutAfterTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:361
static constexpr index_t ElementwiseBlocksize
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:594
static constexpr index_t TransposeTransferOutScalarPerVectorAligned
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:578
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::MakeArgumentPointer
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOp &a_element_op, const BElementwiseOp &b_element_op, const CDEElementwiseOp &cde_element_op, const ck::index_t split_k=1) override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1713
static constexpr auto conv_ngchw_to_nhwgc_transformer
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:568
std::conditional_t< is_NGCHW_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NHWGC, std::conditional_t< is_NGCDHW_NGKDHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NDHWGC, ELayout > > ELayoutAfterTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:367
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeGKCYXTransposeDesc< NDimSpatial >({}, {}))> GKCYXTransposeDescType
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:589
ADataType ABDataType
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:327
static auto MakeArgument(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOp &a_element_op, const BElementwiseOp &b_element_op, const CDEElementwiseOp &cde_element_op, const ck::index_t split_k=1)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1666
static constexpr index_t ClusterLengthNPerBlock
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:565
GridwiseElementwise< Tuple< GKCYXTransposeDescType >, Tuple< GKYXCTransposeDescType >, Tuple< const BDataType * >, Tuple< BDataType * >, Block2TileMapWeiElementwise, element_wise::PassThrough, ElementwiseBlocksize, MPerBlock, NPerBlock, MPerBlock/ClusterLengthMPerBlock, NPerBlock/ClusterLengthNPerBlock, Sequence< 1, 0 >, Sequence< 1 >, Sequence< CDEBlockTransferScalarPerVector_NPerBlock >, I0, I1 > GridwiseElementwiseWeightTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:630
size_t GetWorkSpaceSize(const BaseArgument *p_arg) const override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1800
decltype(transform_k0_m_k1_to_m_k(AGridDesc_AK0_M_AK1{})) AGridDesc_M_K
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:503
decltype(MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(EGridDesc_M_N{})) EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:510
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeNHWGCTransposeDesc< NDimSpatial >({}, {}))> NHWGCTransposeDescType
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:586
GridwiseElementwise< Tuple< NHWGCTransposeDescType >, Tuple< NGCHWTransposeDescType >, Tuple< const EDataType * >, Tuple< EDataType * >, Block2TileMapInOutElementwise, element_wise::PassThrough, ElementwiseBlocksize, NPerBlock, MPerBlock, NPerBlock/ClusterLengthNPerBlock, MPerBlock/ClusterLengthMPerBlock, Sequence< 1, 0 >, Sequence< CDEBlockTransferScalarPerVector_NPerBlock >, Sequence< TransposeTransferOutScalarPerVectorAligned >, I0, I1 > GridwiseElementwiseOutputTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:648
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::AGridDesc_AK0_M_AK1
remove_cvref_t< tuple_element_t< 0, ABDsEGridDesc > > AGridDesc_AK0_M_AK1
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:498
constexpr static ConvToGemmBwdDataTransform dummy_conv_to_gemm_transform
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:495
static constexpr auto I3
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:332
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::IsSupportedArgument
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1660
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::NeedTransposeKernel
static constexpr bool NeedTransposeKernel
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:341
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1759
static constexpr auto I1
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:330
static constexpr auto I0
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:329
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeGKYXCTransposeDesc< NDimSpatial >({}, {}))> GKYXCTransposeDescType
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:592
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::IsSupportedArgument
static bool IsSupportedArgument(const Argument &arg)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1438
GridwiseGemmMultipleD_xdl_cshuffle< GridwiseGemmMultiDTemplateParams > GridwiseGemm
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:466
static auto transform_k0_m_k1_to_m_k(const Desc_K0_M_K1 &desc_k0_m_k1)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:481
BlockToCTileMap_M00_N0_M01Adapt< MPerBlock, NPerBlock > Block2TileMapWeiElementwise
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:561
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::BGridDesc_BK0_N_BK1
remove_cvref_t< tuple_element_t< 1, ABDsEGridDesc > > BGridDesc_BK0_N_BK1
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:499
std::conditional_t< is_NGCHW_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NHWGK, std::conditional_t< is_NGCDHW_NGKDHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NDHWGK, ALayout > > ALayoutAfterTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:354
static auto GetDummyABDsEGridDescriptor(const ConvToGemmBwdDataTransform &conv_to_gemm_transform)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:389
static constexpr index_t ClusterLengthMPerBlock
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:563
decltype(GridwiseGemmCTranspose::MakeDefaultBlock2ETileMap(EGridDesc_M_N{})) Block2ETileMap
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:514
remove_cvref_t< tuple_element_t< 2, ABDsEGridDesc > > DsGridDesc_M_N
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:500
static auto MakeInvoker()
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1711
static constexpr bool isATensorColMajor
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:334
std::string GetTypeString() const override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1764
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1::SetWorkSpacePointer
void SetWorkSpacePointer(BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:1813
OffsettedBlockToCTileMap< Block2ETileMap > GroupedGemmBlock2ETileMap
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:516
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeNGCHWTransposeDesc< NDimSpatial >({}, {}))> NGCHWTransposeDescType
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:583
static auto MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const EGridDesc_M_N e_grid_desc_m_n)
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:474
static constexpr index_t MaxGroupedGemmGroupsNum
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:312
static constexpr bool CTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:345
std::conditional_t< CTranspose, GridwiseGemmMultipleD_xdl_cshuffle< GridwiseGemmCTransposeTemplateParameters >, GridwiseGemm > GridwiseGemmCTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:470
GridwiseElementwise< Tuple< NGCHWTransposeDescType >, Tuple< NHWGCTransposeDescType >, Tuple< const ADataType * >, Tuple< ADataType * >, Block2TileMapInOutElementwise, element_wise::PassThrough, ElementwiseBlocksize, NPerBlock, MPerBlock, NPerBlock/ClusterLengthNPerBlock, MPerBlock/ClusterLengthMPerBlock, Sequence< 1, 0 >, Sequence< TransposeTransferInScalarPerVectorAligned >, Sequence< CDEBlockTransferScalarPerVector_NPerBlock >, I1, I0 > GridwiseElementwiseInputTranspose
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:612
BlockToCTileMap_M00_N0_M01Adapt< NPerBlock, MPerBlock > Block2TileMapInOutElementwise
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:560
remove_cvref_t< tuple_element_t< 3, ABDsEGridDesc > > EGridDesc_M_N
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:501
static constexpr index_t NumDTensor
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:320
decltype(GridwiseGemmCTranspose::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(DsGridDesc_M_N{})) DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:508
static constexpr auto I2
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:331
static constexpr index_t TransposeTransferInScalarPerVectorAligned
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:576
static constexpr bool IsSplitKSupported
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:322
decltype(GetDummyABDsEGridDescriptor(dummy_conv_to_gemm_transform)) ABDsEGridDesc
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:496
decltype(transform_k0_m_k1_to_m_k(BGridDesc_BK0_N_BK1{})) BGridDesc_N_K
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:504
static constexpr GemmSpecialization GemmSpec
Definition: device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:321
Definition: device_grouped_conv_bwd_data_multiple_d.hpp:36
Definition: unary_element_wise_operation.hpp:308