/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/grouped_convolution/kernel/grouped_convolution_forward_kernel.hpp Source File#
grouped_convolution_forward_kernel.hpp
Go to the documentation of this file.
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:149
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition: amd_buffer_addressing.hpp:35
ConvolutionSpecialization
Definition: convolution_specialization.hpp:11
@ Filter1x1Stride1Pad0
@ Filter1x1Pad0
constexpr CK_TILE_HOST_DEVICE auto pad_tensor_view(const TensorView &tensor_view, const TileLengths &tile_lengths, DoPads)
Definition: tensor_view.hpp:545
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition: concat.hpp:43
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:75
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:429
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
__device__ X atomic_add(X *p_dst, const X &x)
Definition: batched_gemm_kernel.hpp:62
Definition: grouped_convolution_forward_kernel.hpp:415
index_t w_size
Definition: grouped_convolution_forward_kernel.hpp:419
index_t h_start
Definition: grouped_convolution_forward_kernel.hpp:418
index_t w_start
Definition: grouped_convolution_forward_kernel.hpp:418
index_t d_size
Definition: grouped_convolution_forward_kernel.hpp:419
index_t h_size
Definition: grouped_convolution_forward_kernel.hpp:419
index_t block_start
Definition: grouped_convolution_forward_kernel.hpp:416
index_t block_end
Definition: grouped_convolution_forward_kernel.hpp:417
index_t d_start
Definition: grouped_convolution_forward_kernel.hpp:418
Definition: grouped_convolution_forward_kernel.hpp:407
index_t num_d_pieces
Definition: grouped_convolution_forward_kernel.hpp:411
index_t total_w
Definition: grouped_convolution_forward_kernel.hpp:409
index_t total_d
Definition: grouped_convolution_forward_kernel.hpp:409
std::array< PieceInfo, MaxPieces > pieces
Definition: grouped_convolution_forward_kernel.hpp:423
static constexpr index_t MaxPieces
Definition: grouped_convolution_forward_kernel.hpp:422
index_t total_spatial
Definition: grouped_convolution_forward_kernel.hpp:410
index_t num_w_pieces
Definition: grouped_convolution_forward_kernel.hpp:411
index_t total_h
Definition: grouped_convolution_forward_kernel.hpp:409
index_t num_h_pieces
Definition: grouped_convolution_forward_kernel.hpp:411
The Grouped Convolution kernel device arguments.
Definition: grouped_convolution_forward_kernel.hpp:30
long_index_t group_stride_c
Definition: grouped_convolution_forward_kernel.hpp:384
index_t input_batch_stride
Definition: grouped_convolution_forward_kernel.hpp:390
static constexpr index_t NonSpatialDims
Definition: grouped_convolution_forward_kernel.hpp:355
remove_cvref_t< decltype(ConvToGemmFwdTransformer{} .template MakeADescriptor_M_K< typename GroupedConvTraitsType_::InLayout >())> AGridDescMK
Definition: grouped_convolution_forward_kernel.hpp:347
index_t n_per_split
Definition: grouped_convolution_forward_kernel.hpp:388
const CDElementwise elfunc
Definition: grouped_convolution_forward_kernel.hpp:375
AGridDescMK a_grid_desc_m_k
Definition: grouped_convolution_forward_kernel.hpp:378
CGridDescMN CGridDescMN_t
Definition: grouped_convolution_forward_kernel.hpp:403
const void * in_ptr
Definition: grouped_convolution_forward_kernel.hpp:372
index_t GemmM
Definition: grouped_convolution_forward_kernel.hpp:366
remove_cvref_t< decltype(ConvToGemmFwdTransformer{} .template MakeCDescriptor_M_N< typename GroupedConvTraitsType_::OutLayout >())> CGridDescMN
Definition: grouped_convolution_forward_kernel.hpp:353
index_t original_n
Definition: grouped_convolution_forward_kernel.hpp:389
long_index_t group_stride_b
Definition: grouped_convolution_forward_kernel.hpp:383
CGridDescMN c_grid_desc_m_n
Definition: grouped_convolution_forward_kernel.hpp:380
CDElementwise_ CDElementwise
Definition: grouped_convolution_forward_kernel.hpp:39
index_t n_splits
Definition: grouped_convolution_forward_kernel.hpp:387
std::array< const void *, NumDTensor > ds_ptr
Definition: grouped_convolution_forward_kernel.hpp:374
array< index_t, GroupedConvTraitsType_::NDimSpatial > input_left_pads
Definition: grouped_convolution_forward_kernel.hpp:362
AGridDescMK AGridDescMK_t
Definition: grouped_convolution_forward_kernel.hpp:402
const void * wei_ptr
Definition: grouped_convolution_forward_kernel.hpp:373
BGridDescNK b_grid_desc_n_k
Definition: grouped_convolution_forward_kernel.hpp:379
index_t num_spatial_pieces
Definition: grouped_convolution_forward_kernel.hpp:426
array< index_t, NonSpatialDims+GroupedConvTraitsType_::NDimSpatial > out_g_n_k_wos_lengths
Definition: grouped_convolution_forward_kernel.hpp:358
array< index_t, NonSpatialDims+GroupedConvTraitsType_::NDimSpatial > wei_g_k_c_xs_lengths
Definition: grouped_convolution_forward_kernel.hpp:357
index_t GemmN
Definition: grouped_convolution_forward_kernel.hpp:367
index_t NumGroupsToMerge
Definition: grouped_convolution_forward_kernel.hpp:370
long_index_t spatial_offset_in
Definition: grouped_convolution_forward_kernel.hpp:394
SplitImageInfo split_image
Definition: grouped_convolution_forward_kernel.hpp:427
CK_TILE_HOST GroupedConvFwdKernelArgs(const GroupedConvFwdHostArgs< CDElementwise > &args)
Definition: grouped_convolution_forward_kernel.hpp:54
array< index_t, GroupedConvTraitsType_::NDimSpatial > input_right_pads
Definition: grouped_convolution_forward_kernel.hpp:363
index_t output_batch_stride
Definition: grouped_convolution_forward_kernel.hpp:391
long_index_t group_stride_a
Definition: grouped_convolution_forward_kernel.hpp:382
index_t GemmK
Definition: grouped_convolution_forward_kernel.hpp:368
void * out_ptr
Definition: grouped_convolution_forward_kernel.hpp:376
ConvToGemmFwdTransformer transformer_
Definition: grouped_convolution_forward_kernel.hpp:398
index_t GemmBatch
Definition: grouped_convolution_forward_kernel.hpp:369
long_index_t spatial_offset_out
Definition: grouped_convolution_forward_kernel.hpp:395
TransformConvFwdToGemm< GroupedConvTraitsType_::NDimSpatial, GroupedConvTraitsType_::ConvSpecialization, GroupedConvTraitsType_::VectorSizeA, GroupedConvTraitsType_::VectorSizeB, GroupedConvTraitsType_::VectorSizeC, GroupedConvTraitsType_::NumGroupsToMerge, true > ConvToGemmFwdTransformer
Definition: grouped_convolution_forward_kernel.hpp:38
array< index_t, NonSpatialDims+GroupedConvTraitsType_::NDimSpatial > in_g_n_c_wis_lengths
Definition: grouped_convolution_forward_kernel.hpp:356
static constexpr index_t NumDTensor
Definition: grouped_convolution_forward_kernel.hpp:40
array< index_t, GroupedConvTraitsType_::NDimSpatial > conv_filter_dilations
Definition: grouped_convolution_forward_kernel.hpp:361
index_t k_batch
Definition: grouped_convolution_forward_kernel.hpp:365
remove_cvref_t< decltype(ConvToGemmFwdTransformer{} .template MakeBDescriptor_N_K< typename GroupedConvTraitsType_::WeiLayout >())> BGridDescNK
Definition: grouped_convolution_forward_kernel.hpp:350
array< index_t, GroupedConvTraitsType_::NDimSpatial > conv_filter_strides
Definition: grouped_convolution_forward_kernel.hpp:360
The Grouped Conv kernel host arguments.
Definition: grouped_convolution_utils.hpp:20
const std::vector< const void * > ds_ptr
Definition: grouped_convolution_utils.hpp:41
Definition: grouped_convolution_forward_kernel.hpp:524
index_t h
Definition: grouped_convolution_forward_kernel.hpp:525
index_t d
Definition: grouped_convolution_forward_kernel.hpp:525
index_t w
Definition: grouped_convolution_forward_kernel.hpp:525
The Grouped Convolution Forward kernel template.
Definition: grouped_convolution_forward_kernel.hpp:473
static CK_TILE_DEVICE index_t FindPieceId(index_t block_id, const SplitImageInfo &split_info, index_t num_pieces)
Definition: grouped_convolution_forward_kernel.hpp:570
remove_cvref_t< typename EpiloguePipeline::DsLayout > GemmDsLayout
Definition: grouped_convolution_forward_kernel.hpp:490
remove_cvref_t< GemmPipeline_ > GemmPipeline
Definition: grouped_convolution_forward_kernel.hpp:479
remove_cvref_t< TilePartitioner_ > TilePartitioner
Definition: grouped_convolution_forward_kernel.hpp:478
typename EpiloguePipeline::CDElementwise CDElementwise
Definition: grouped_convolution_forward_kernel.hpp:501
static constexpr auto I1
Definition: grouped_convolution_forward_kernel.hpp:509
static constexpr auto I2
Definition: grouped_convolution_forward_kernel.hpp:510
static CK_TILE_DEVICE index_t FlattenSpatial(index_t d, index_t h, index_t w, index_t total_h, index_t total_w)
Definition: grouped_convolution_forward_kernel.hpp:551
remove_cvref_t< typename GroupedConvTraitsType_::OutLayout > OutLayout
Definition: grouped_convolution_forward_kernel.hpp:487
GroupedConvFwdKernelArgs< GroupedConvTraitsType_, CDElementwise > GroupedConvFwdKernelArgsSpecialized
Definition: grouped_convolution_forward_kernel.hpp:504
static CK_TILE_DEVICE auto MakeGemmTensorViews(const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc)
Definition: grouped_convolution_forward_kernel.hpp:800
CK_TILE_DEVICE void operator()(GroupedConvFwdKernelArgsSpecialized &kargs) const
Definition: grouped_convolution_forward_kernel.hpp:1060
static constexpr auto I0
Definition: grouped_convolution_forward_kernel.hpp:508
static constexpr bool EnableSplitImage
Definition: grouped_convolution_forward_kernel.hpp:474
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize()
Definition: grouped_convolution_forward_kernel.hpp:648
remove_cvref_t< typename GroupedConvTraitsType_::WeiLayout > WeiLayout
Definition: grouped_convolution_forward_kernel.hpp:486
remove_cvref_t< typename EpiloguePipeline::ODataType > OutDataType
Definition: grouped_convolution_forward_kernel.hpp:499
static CK_TILE_DEVICE void RunGemm2LDS(const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, void *__restrict__ smem_ptr_0, void *__restrict__ smem_ptr_1, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc, const index_t gemm_k, const index_t block_idx_m, const index_t block_idx_n, const CDElementwise &elfunc)
Runs single GEMM problem cooperatively by whole workgroup.
Definition: grouped_convolution_forward_kernel.hpp:996
remove_cvref_t< typename GroupedConvTraitsType_::DsLayout > DsLayout
Definition: grouped_convolution_forward_kernel.hpp:488
static constexpr index_t kBlockSize
Definition: grouped_convolution_forward_kernel.hpp:493
remove_cvref_t< typename EpiloguePipeline::DsDataType > DsDataType
Definition: grouped_convolution_forward_kernel.hpp:497
CK_TILE_DEVICE void CallExplicitGemm(GroupedConvFwdKernelArgsSpecialized &kargs) const
Definition: grouped_convolution_forward_kernel.hpp:1035
remove_cvref_t< typename GemmPipeline::BLayout > GemmBLayout
Definition: grouped_convolution_forward_kernel.hpp:482
static constexpr index_t NDimSpatial
Definition: grouped_convolution_forward_kernel.hpp:475
static CK_TILE_HOST auto BlockSize()
Definition: grouped_convolution_forward_kernel.hpp:636
static constexpr auto I3
Definition: grouped_convolution_forward_kernel.hpp:511
static CK_TILE_HOST const std::string GetName()
Definition: grouped_convolution_forward_kernel.hpp:593
static CK_TILE_HOST bool IsSupportedArgument(const GroupedConvFwdKernelArgsSpecialized &kargs)
Definition: grouped_convolution_forward_kernel.hpp:653
remove_cvref_t< typename GemmPipeline::BDataType > WeiDataType
Definition: grouped_convolution_forward_kernel.hpp:496
static constexpr CK_TILE_HOST GroupedConvFwdKernelArgsSpecialized MakeKernelArgs(const GroupedConvFwdHostArgs< CDElementwise > &hostArgs)
Definition: grouped_convolution_forward_kernel.hpp:642
static constexpr index_t NumDTensor
Definition: grouped_convolution_forward_kernel.hpp:491
static CK_TILE_DEVICE auto MakeGemmTileWindows(const PadView &views, const index_t i_m, const index_t i_n)
Definition: grouped_convolution_forward_kernel.hpp:882
remove_cvref_t< typename GemmPipeline::ALayout > GemmALayout
Definition: grouped_convolution_forward_kernel.hpp:481
static constexpr bool IsSplitKSupported
Definition: grouped_convolution_forward_kernel.hpp:506
remove_cvref_t< typename GemmPipeline::CLayout > GemmCLayout
Definition: grouped_convolution_forward_kernel.hpp:483
static CK_TILE_DEVICE auto MakeGemmPadViews(const TensorView &views)
Definition: grouped_convolution_forward_kernel.hpp:841
static CK_TILE_DEVICE void RunGemm(const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, void *smem_ptr_0, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc, const index_t gemm_k, const index_t block_idx_m, const index_t block_idx_n, const CDElementwise &elfunc)
Runs single GEMM problem cooperatively by whole workgroup.
Definition: grouped_convolution_forward_kernel.hpp:937
remove_cvref_t< typename GroupedConvTraitsType_::InLayout > InLayout
Definition: grouped_convolution_forward_kernel.hpp:485
static constexpr ConvolutionSpecialization ConvSpecialization
Definition: grouped_convolution_forward_kernel.hpp:476
remove_cvref_t< EpiloguePipeline_ > EpiloguePipeline
Definition: grouped_convolution_forward_kernel.hpp:480
static CK_TILE_DEVICE SpatialCoords UnflattenSpatial(index_t flat, index_t h_size, index_t w_size)
Definition: grouped_convolution_forward_kernel.hpp:530
static CK_TILE_HOST auto GridSize(const GroupedConvFwdKernelArgsSpecialized &kargs)
Definition: grouped_convolution_forward_kernel.hpp:630
remove_cvref_t< typename GemmPipeline::ADataType > InDataType
Definition: grouped_convolution_forward_kernel.hpp:495
constexpr CK_TILE_HOST IndexType GetOriginalN() const
Definition: transform_conv_fwd_to_gemm.hpp:264
constexpr CK_TILE_HOST IndexType GetN() const
Definition: transform_conv_fwd_to_gemm.hpp:263
Definition: integral_constant.hpp:13
std::vector< ck_tile::long_index_t > input_spatial_lengths_
Definition: convolution_parameter.hpp:130
std::vector< ck_tile::long_index_t > output_spatial_lengths_
Definition: convolution_parameter.hpp:131
std::vector< ck_tile::long_index_t > input_right_pads_
Definition: convolution_parameter.hpp:137
std::vector< ck_tile::long_index_t > conv_filter_strides_
Definition: convolution_parameter.hpp:133
std::vector< ck_tile::long_index_t > filter_spatial_lengths_
Definition: convolution_parameter.hpp:129
std::vector< ck_tile::long_index_t > input_left_pads_
Definition: convolution_parameter.hpp:136
std::vector< ck_tile::long_index_t > conv_filter_dilations_
Definition: convolution_parameter.hpp:134
Definition: type_traits.hpp:115
Definition: sequence.hpp:49
Definition: functional.hpp:43