18     template <
typename AGridDesc,
 
   20               typename ABlockTransfer,
 
   22               typename ABlockBuffer,
 
   23               typename ABlockTransferStep,
 
   26               typename BBlockTransfer,
 
   28               typename BBlockBuffer,
 
   29               typename BBlockTransferStep,
 
   30               typename BlockwiseGemm,
 
   31               typename CThreadBuffer>
 
   32     __device__ 
static void Run(
const AGridDesc& a_grid_desc,
 
   33                                const ABlockDesc& a_block_desc,
 
   34                                ABlockTransfer& a_blockwise_copy,
 
   35                                const AGridBuffer& a_grid_buf,
 
   36                                ABlockBuffer& a_block_buf,
 
   37                                const ABlockTransferStep& a_block_copy_step,
 
   38                                const BGridDesc& b_grid_desc,
 
   39                                const BBlockDesc& b_block_desc,
 
   40                                BBlockTransfer& b_blockwise_copy,
 
   41                                const BGridBuffer& b_grid_buf,
 
   42                                BBlockBuffer& b_block_buf,
 
   43                                const BBlockTransferStep& b_block_copy_step,
 
   44                                const BlockwiseGemm& blockwise_gemm,
 
   45                                CThreadBuffer& c_thread_buf,
 
   49         a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
 
   50         b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
 
   52         a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
 
   53         b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
 
   59         a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
 
   60         b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
 
   66             a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
 
   68             b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
 
   70             blockwise_gemm.Run(a_block_buf, b_block_buf, c_thread_buf);
 
   74             a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
 
   75             b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
 
   76             a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
 
   77             b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
 
   84             blockwise_gemm.Run(a_block_buf, b_block_buf, c_thread_buf);
 
int32_t index_t
Definition: ck.hpp:297
 
__device__ void block_sync_lds()
Definition: synchronization.hpp:10
 
Definition: gridwise_gemm_pipeline_v3.hpp:11
 
static __device__ void Run(const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, const BlockwiseGemm &blockwise_gemm, CThreadBuffer &c_thread_buf, index_t num_loop)
Definition: gridwise_gemm_pipeline_v3.hpp:32
 
__host__ static constexpr __device__ bool IsSupported(index_t)
Definition: gridwise_gemm_pipeline_v3.hpp:12