/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp File Reference#
device_grouped_conv_bwd_weight_dl.hpp File Reference
#include <iostream>
#include <numeric>
#include <sstream>
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
Go to the source code of this file.
Namespaces | |
ck | |
ck::tensor_operation | |
ck::tensor_operation::device | |
Functions | |
template<typename GridwiseGemm , typename FloatAB , typename FloatC , typename AGridDesc_B_K0_M0_M1_K1 , typename BGridDesc_B_K0_N0_N1_K1 , typename CGridDesc_M0_M10_M11_N0_N10_N11 , typename Block2CTileMap , typename ComputePtrOffsetOfBatch , bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop> | |
__global__ void | ck::tensor_operation::device::kernel_batched_gemm_dlops_bwd_weight (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const index_t batch_count, const AGridDesc_B_K0_M0_M1_K1 a_grid_desc_kbatch_k0_m0_m1_k1, const BGridDesc_B_K0_N0_N1_K1 b_grid_desc_kbatch_k0_n0_n1_k1, const CGridDesc_M0_M10_M11_N0_N10_N11 c_grid_desc_m0_m10_m11_n0_n10_n11, const Block2CTileMap block_2_ctile_map, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch) |