/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp Source File#
thread_group_tensor_slice_transfer_v4r1.hpp
Go to the documentation of this file.
Definition: ck.hpp:264
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
typename remove_reference< T >::type remove_reference_t
Definition: type.hpp:292
__host__ constexpr __device__ auto make_zero_multi_index()
Definition: array_multi_index.hpp:21
__host__ constexpr __device__ auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition: cluster_descriptor.hpp:13
Definition: array.hpp:14
Blockwise data transfer.
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:46
__device__ void Run(const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id)
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:137
constexpr __device__ ThreadGroupTensorSliceTransfer_v4r1(const SrcDesc &src_desc, const Index &src_block_slice_origin, const SrcElementwiseOperation &src_element_op, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const DstElementwiseOperation &dst_element_op)
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:53
static constexpr index_t nDim
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:47
static constexpr auto thread_slice_lengths
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:49
__device__ void SetSrcSliceOrigin(const SrcDesc &src_desc, const Index &src_block_slice_origin)
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:97
__device__ void RunRead(const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:113
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &step)
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:147
__device__ void MoveDstSliceWindow(const DstDesc &dst_desc, const Index &step)
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:156
__device__ void RunWrite(const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition: thread_group_tensor_slice_transfer_v4r1.hpp:125
__device__ void MoveDstSliceWindow(const DstDesc &dst_desc, const Index &dst_slice_origin_step_idx)
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:805
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &src_slice_origin_step_idx)
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:790
__device__ void RunRead(const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:117
__device__ void RunWrite(const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:512
__device__ void SetSrcSliceOrigin(const SrcDesc &src_desc, const Index &src_slice_origin_idx)
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:106
__device__ void SetDstSliceOrigin(const DstDesc &dst_desc, const Index &dst_slice_origin_idx)
Definition: threadwise_tensor_slice_transfer_v3r1.hpp:111
Definition: integral_constant.hpp:10
Definition: type.hpp:177