/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/wrapper/operations/copy.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/wrapper/operations/copy.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/wrapper/operations/copy.hpp File Reference

#include "ck/wrapper/utils/tensor_utils.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"

Go to the source code of this file.

Functions
template<typename DimAccessOrderTuple , index_t VectorDim, index_t ScalarPerVector, typename SrcTensorType , typename DstTensorType >
__device__ void	copy (const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
	Perform optimized copy between two tensors partitions (threadwise copy). Tensors must have the same size. More...

template<typename SrcTensorType , typename DstTensorType >
__host__ __device__ void	copy (const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
	Perform generic copy between two tensors partitions (threadwise copy). Tensors must have the same size. More...

template<typename DimAccessOrderTuple , index_t VectorDim, index_t ScalarPerVector, typename SrcTensorType , typename DstTensorType , typename ThreadShape , typename ThreadUnrolledDesc >
__device__ void	blockwise_copy (const SrcTensorType &src_tensor, DstTensorType &dst_tensor, [[maybe_unused]] const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout)
	Perform optimized blockwise copy between two tensors. Tensors must have the same size. More...

Function Documentation

◆ blockwise_copy()

template<typename DimAccessOrderTuple , index_t VectorDim, index_t ScalarPerVector, typename SrcTensorType , typename DstTensorType , typename ThreadShape , typename ThreadUnrolledDesc >

__device__ void blockwise_copy	(	const SrcTensorType &	src_tensor,
		DstTensorType &	dst_tensor,
		[[maybe_unused] ] const Layout< ThreadShape, ThreadUnrolledDesc > &	thread_layout
	)

Perform optimized blockwise copy between two tensors. Tensors must have the same size.

Note: At now Vgpr and Sgpr are not supported.

Template Parameters

DimAccessOrderTuple	Tuple with dimension access order.
VectorDim	Dimension for vectorize read and write.
ScalarPerVector	Number of scalar per vectorize read and write.

Parameters

src_tensor	Source tensor.
dst_tensor	Destination tensor.
thread_layout	Thread layout per each dimension for copy.

◆ copy() [1/2]

template<typename DimAccessOrderTuple , index_t VectorDim, index_t ScalarPerVector, typename SrcTensorType , typename DstTensorType >

__device__ void copy	(	const SrcTensorType &	src_tensor,
		DstTensorType &	dst_tensor
	)

Perform optimized copy between two tensors partitions (threadwise copy). Tensors must have the same size.

Template Parameters

DimAccessOrderTuple	Tuple with dimension access order.
VectorDim	Dimension for vectorized read and write.
ScalarPerVector	Number of scalar per vectorized read and write.

Parameters

src_tensor	Source tensor.
dst_tensor	Destination tensor.

◆ copy() [2/2]

template<typename SrcTensorType , typename DstTensorType >

__host__ __device__ void copy	(	const SrcTensorType &	src_tensor,
		DstTensorType &	dst_tensor
	)

Perform generic copy between two tensors partitions (threadwise copy). Tensors must have the same size.

Parameters

src_tensor	Source tensor.
dst_tensor	Destination tensor.

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/wrapper/operations/copy.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/wrapper/operations/copy.hpp File Reference#

Functions

Function Documentation

◆ blockwise_copy()

◆ copy() [1/2]

◆ copy() [2/2]