The wrapper is under development and its functionality is limited.
The CK library provides a lightweight wrapper for more complex operations implemented in the library.
const auto shape_4x2x4 = ck::make_tuple(4, ck::make_tuple(2, 4));
const auto strides_s2x1x8 = ck::make_tuple(2, ck::make_tuple(1, 8));
const auto layout = ck::wrapper::make_layout(shape_4x2x4, strides_s2x1x8);
std::array<ck::index_t, 32> data;
auto tensor = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Generic>(&data[0], layout);
for(ck::index_t w = 0; w < size(tensor); w++) {
tensor(w) = w;
// slice() == slice(0, -1) (whole dimension)
auto tensor_slice = tensor(ck::wrapper::slice(1, 3), ck::make_tuple(ck::wrapper::slice(), ck::wrapper::slice()));
std::cout << "dims:2,(2,4) strides:2,(1,8)" << std::endl;
for(ck::index_t h = 0; h < ck::wrapper::size<0>(tensor_slice); h++)
for(ck::index_t w = 0; w < ck::wrapper::size<1>(tensor_slice); w++)
std::cout << tensor_slice(h, w) << " ";
std::cout << std::endl;
dims:2,(2,4) strides:2,(1,8)
1 5 9 13 17 21 25 29
2 6 10 14 18 22 26 30
Advanced examples:
template<typename Shape, typename UnrolledDescriptorType>
struct Layout# Layout wrapper that performs the tensor descriptor logic.
- Template Parameters:
Shape – Tuple of Number<> (for compile-time layout) or index_t (dynamic layout). It is possible to pass nested shapes (e.g. ((4, 2), 2)), nested dimensions are merged.
UnrolledDescriptorType – Tensor descriptor for unnested shape dims.
Layout helpers#
namespace ck#
namespace wrapper#
- template<typename Shape, typename Strides> __host__ constexpr __device__ auto make_layout (const Shape &shape, const Strides &strides)
Make layout function.
- Template Parameters:
Shape – Shape for layout.
Strides – Strides for layout.
- Returns:
Constructed layout.
- template<typename Shape> __host__ constexpr __device__ auto make_layout (const Shape &shape)
Make layout function with packed strides (column-major).
- Template Parameters:
Shape – Shape for layout.
- Returns:
Constructed layout.
- template<typename T> __host__ __device__ constexpr T get (const T &dim)
Get dim.
- Parameters:
dim – Dimension.
- Returns:
Returned the same dimension.
- template<index_t idx, typename... Dims> __host__ constexpr __device__ auto get (const Tuple< Dims... > &tuple)
Get element from tuple (Shape/Strides/Idxs).
- Template Parameters:
idx – Index to lookup.
- Parameters:
tuple – Tuple to lookup.
- Returns:
Requsted element.
- template<index_t idx, typename Shape, typename FlattenDesc> __host__ constexpr __device__ auto get (const Layout< Shape, FlattenDesc > &layout)
Get sub layout.
- Template Parameters:
idx – Index to lookup.
- Parameters:
layout – Layout to create sub layout.
- Returns:
Requsted sub layout.
- template<index_t Idx, index_t... Idxs, typename T> __host__ constexpr __device__ auto get (const T &elem)
Hierarchical get.
- Template Parameters:
Idxs – Indexes to lookup.
- Parameters:
elem – Element to lookup.
- Returns:
Requsted element.
- template<typename T> __host__ __device__ constexpr T size (const T &dim)
Get size.
- Parameters:
dim – Size.
- Returns:
Returned the same size.
- template<index_t idx, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto size (const Layout< Shape, UnrolledDescriptorType > &layout)
Length get (product if tuple).
- Template Parameters:
idx – Index to lookup.
- Parameters:
layout – Layout to get Shape of.
- Returns:
Requsted length.
- template<typename... ShapeDims> __host__ constexpr __device__ auto size (const Tuple< ShapeDims... > &shape)
Shape size (product of dims).
- Parameters:
shape – Shape to lookup.
- Returns:
Requsted size.
- template<typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto size (const Layout< Shape, UnrolledDescriptorType > &layout)
Layout size (product of dims).
- Parameters:
layout – Layout to calculate shape size.
- Returns:
Requsted size.
- template<index_t idx, typename... Ts> __host__ constexpr __device__ auto size (const Tuple< Ts... > &tuple)
Length get from tuple (product if tuple).
- Template Parameters:
idx – Index to lookup.
- Parameters:
tuple – Tuple to lookup.
- Returns:
Requsted length.
- template<index_t Idx, index_t... Idxs, typename T> __host__ constexpr __device__ auto size (const T &elem)
Hierarchical size.
- Template Parameters:
Idx – First index to lookup (to avoid empty Idxs).
Idxs – Next indexes to lookup.
- Parameters:
elem – Element to lookup.
- Returns:
Requsted element.
- template<typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto rank ([[maybe_unused]] const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
- Parameters:
layout – Layout to calculate rank.
- Returns:
Requsted rank.
- template<typename... Dims> __host__ constexpr __device__ auto rank ([[maybe_unused]] const Tuple< Dims... > &tuple)
Get tuple rank (num elements in tuple). Return 1 if scalar passed.
- Parameters:
tuple – Tuple to calculate rank.
- Returns:
Requsted rank.
- template<index_t IDim> __host__ constexpr __device__ index_t rank ([[maybe_unused]] const Number< IDim > &dim)
Rank for scalar.
- Parameters:
dim – Dimension scalar.
- Returns:
Returned 1.
- __host__ constexpr __device__ index_t rank ([[maybe_unused]] const index_t &dim)
Rank for scalar.
- Parameters:
dim – Dimension scalar.
- Returns:
Returned 1.
- template<index_t... Idxs, typename T> __host__ constexpr __device__ auto rank (const T &elem)
Hierarchical rank.
- Template Parameters:
Idxs – Indexes to lookup.
- Parameters:
elem – Element to lookup.
- Returns:
Requsted rank.
- template<typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto depth (const Layout< Shape, UnrolledDescriptorType > &layout)
Get depth of the layout shape (return 0 if scalar).
- Parameters:
layout – Layout to calculate depth.
- Returns:
Requsted depth.
- template<typename... Dims> __host__ constexpr __device__ auto depth (const Tuple< Dims... > &tuple)
Get depth of the tuple. (return 0 if scalar)
- Parameters:
tuple – Tuple to calculate depth.
- Returns:
Requsted depth.
- template<index_t IDim> __host__ constexpr __device__ index_t depth ([[maybe_unused]] const Number< IDim > &dim)
Depth for scalar.
- Parameters:
dim – Scalar.
- Returns:
Returned 0.
- __host__ constexpr __device__ index_t depth ([[maybe_unused]] const index_t &dim)
Depth for scalar.
- Parameters:
dim – Scalar.
- Returns:
Returned 0.
- template<index_t... Idxs, typename T> __host__ constexpr __device__ auto depth (const T &elem)
Hierarchical depth.
- Template Parameters:
Idxs – Indexes to lookup.
- Parameters:
elem – Element to lookup.
- Returns:
Requsted depth.
namespace wrapper#
template<MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType>
struct Tensor# Tensor wrapper that performs static and dynamic buffer logic. The tensor is based on a descriptor stored in the Layout. Additionally, tensor can be sliced or shifted using multi-index offset.
- Template Parameters:
BufferAddressSpace – Memory type (Generic, Global, LDS, VGPR, SGPR).
ElementType – Element data type.
Shape – Tensor shape (layout component).
UnrolledDescriptorType – Flatten descriptor (layout component).
Tensor helpers#
namespace ck
namespace wrapper
using MemoryTypeEnum = AddressSpaceEnum#
Memory type, allowed members:
template<MemoryTypeEnum MemoryType, typename ElementType, typename Shape, typename UnrolledDescriptorType>
constexpr auto make_tensor(ElementType *pointer, const Layout<Shape, UnrolledDescriptorType> &layout)# Make tensor function.
- Template Parameters:
MemoryType – Type of memory.
- Parameters:
pointer – Pointer to the memory.
layout – Tensor layout.
- Returns:
Constructed tensor.
template<MemoryTypeEnum MemoryType, typename ElementType, typename Shape, typename UnrolledDescriptorType>
constexpr auto make_register_tensor(const Layout<Shape, UnrolledDescriptorType> &layout)# Make SGPR or VGPR tensor function.
- Template Parameters:
MemoryType – Type of memory.
ElementType – Memory data type.
- Returns:
Constructed tensor.
- template<MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ const auto & layout (const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
- Parameters:
tensor – Tensor to get layout of.
- Returns:
Requsted layout.
- template<index_t... Idxs, MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto size (const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Product of tensor shape dims.
- Template Parameters:
Idxs – Indexes to access specific shape dim (optional).
- Parameters:
tensor – Tensor to get Shape of.
- Returns:
Requsted size.
- template<index_t... Idxs, MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto rank (const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Rank of Shape tuple.
- Template Parameters:
Idxs – Indexes to access specific shape dim (optional).
- Parameters:
tensor – Tensor to get rank of.
- Returns:
Requsted rank.
- template<index_t... Idxs, MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ auto depth (const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Depth of Shape tuple.
- Template Parameters:
Idxs – Indexes to access specific shape dim (optional).
- Parameters:
tensor – Tensor to get depth of.
- Returns:
Requsted depth.
- template<MemoryTypeEnum BufferAddressSpace, typename ElementType, typename Shape, typename UnrolledDescriptorType> __host__ constexpr __device__ const auto & shape (const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Get Tensor shape.
- Parameters:
tensor – Tensor to get shape from.
- Returns:
Requsted shape.
template<typename FromType, typename ToType>
constexpr auto slice(const FromType from, const ToType to)# Get dim slice.
- Parameters:
from – Beginning of the interval.
to – End of the interval. (could be also negative to index from the end)
- Returns:
Requested slice. Could be used to create sliced tensor from other tensor.
template<typename ToType>
constexpr auto slice(const ToType to)# Get dim slice. (Assumed that from is equal to 1)
- Parameters:
to – End of the interval. (could be also negative to index from the end)
- Returns:
Requested slice. Could be used to create sliced tensor from other tensor.
constexpr auto slice()#
Get whole dim slice (from = 0, to = -1).
- Returns:
Requested slice. Could be used to create sliced tensor from other tensor.
using MemoryTypeEnum = AddressSpaceEnum#
namespace wrapper
namespace ck
namespace wrapper
- template<typename TensorType, typename ThreadLengthsTuple> __host__ constexpr __device__ auto make_local_partition (TensorType &tensor, [[maybe_unused]] const ThreadLengthsTuple &thread_lengths, const index_t thread_id)
Create local partition for thread (At now only packed partition is supported).
- template<typename TensorType, typename BlockShapeTuple> __host__ constexpr __device__ auto make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const index_t block_id)
Create local tile for thread block. (At now only packed tile is supported).
Temporary to gain the best performance use 2d tile_shape.
- Parameters:
tensor – Tensor for partition.
tile_shape – Shapes of requested tile.
block_id – Block index represented as integer.
- Returns:
Tile tensor.
namespace wrapper
namespace ck
namespace wrapper
- template<typename SrcTensorType, typename DstTensorType> __host__ __device__ void copy (const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
Perform generic copy between two tensors partitions (threadwise copy). Tensors must have the same size.
- Parameters:
src_tensor – Source tensor.
dst_tensor – Destination tensor.
- template<typename DimAccessOrderTuple, index_t VectorDim, index_t ScalarPerVector, typename SrcTensorType, typename DstTensorType> __device__ void copy (const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
Perform optimized copy between two tensors partitions (threadwise copy). Tensors must have the same size.
- Template Parameters:
DimAccessOrderTuple – Tuple with dimension access order.
VectorDim – Dimension for vectorized read and write.
ScalarPerVector – Number of scalar per vectorized read and write.
- Parameters:
src_tensor – Source tensor.
dst_tensor – Destination tensor.
namespace wrapper