impl Namespace Reference

impl Namespace Reference#

Composable Kernel: ck_tile::impl Namespace Reference
ck_tile::impl Namespace Reference

Classes

struct  buffer_load_trait
 
struct  buffer_load_trait< 16, T >
 
struct  buffer_load_trait< 8, T >
 
struct  buffer_load_trait< 4, T >
 
struct  buffer_load_trait< 2, T >
 
struct  buffer_load_trait< 1, T >
 
struct  smem_load_trait
 
struct  smem_load_trait< 16, T >
 
struct  smem_load_trait< 8, T >
 
struct  smem_load_trait< 4, T >
 
struct  smem_load_trait< 2, T >
 
struct  smem_load_trait< 1, T >
 
struct  __integer_sequence
 
struct  __integer_sequence< index_t, Ints... >
 
struct  seq_reverse
 
struct  seq_reverse< sequence< Ids... >, Ns... >
 
struct  reverse_slice_sequence_impl
 
struct  reverse_slice_sequence_impl< sequence< x, xs... >, sequence< m, ms... >, sequence< id, ids... >, SliceSize >
 
struct  reverse_slice_sequence_impl< sequence< x >, sequence< m >, sequence< id >, SliceSize >
 
struct  tuple_array_impl
 
struct  tuple_object
 
struct  tuple_object< idx, T, true >
 
struct  tuple_object< idx, T, false >
 
struct  tuple_base
 
struct  tuple_base< sequence< I... >, T... >
 
struct  tuple_array_impl< T, 0 >
 
struct  tuple_array_impl< T, 1 >
 
struct  ext_vector
 
struct  ext_vector< V_, N_ >
 
struct  is_null_tile_window
 
struct  is_null_tile_window< null_tile_window< T > >
 
struct  sweep_tile_impl
 
struct  sweep_tile_impl< DistributedTensor, UnpacksPerXDim, sequence< I, Is... > >
 
struct  sweep_tile_impl< DistributedTensor, UnpacksPerXDim, sequence<> >
 
struct  sweep_tile_impl_0
 
struct  sweep_tile_impl_0< DistributedTensor, UnpacksPerXDim, sequence< I, Is... > >
 
struct  default_linear_bottom_dims_impl
 
struct  default_linear_bottom_dims_impl< address_space_enum::global, len_ >
 
struct  default_linear_bottom_dims_impl< address_space_enum::lds, len_ >
 
struct  static_counter_uniq_
 
struct  is_static_impl
 
struct  RawIntegerType_
 
struct  RawIntegerType_< 1 >
 
struct  RawIntegerType_< 2 >
 
struct  RawIntegerType_< 4 >
 
struct  RawIntegerType_< 8 >
 
struct  MaskName
 
struct  MaskName< false, false >
 
struct  MaskName< false, true >
 
struct  MaskName< true, false >
 
struct  MaskName< true, true >
 
struct  SimplifiedMaskName
 
struct  SimplifiedMaskName< false >
 
struct  SimplifiedMaskName< true >
 
struct  WarpGemmMfmaDispatcher
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 8, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 8, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 16, 16, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 16, 16, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 16, 16, 32, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 16, 16, 32, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 4, 64, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 64, 4, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 8, false, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, false, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 8, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 8, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 32, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 32, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 4, 64, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 64, 4, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 8, false, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 16, false, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf8_t, ck_tile::fp8_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf8_t, ck_tile::fp8_t, float, 32, 32, 16, true >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 16, false >
 
struct  WarpGemmMfmaDispatcher< ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 16, true >
 

Typedefs

template<index_t I, typename... Ts>
using at_index_t = __type_pack_element< I, Ts... >
 
template<typename T >
using has_is_static = decltype(T::is_static())
 
template<typename T >
using RawIntegerType = typename RawIntegerType_< sizeof(T)>::type
 

Functions

template<index_t N>
CK_TILE_DEVICE void insert_dummy_dep_per_dword (array< float, N > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 2 > (array< float, 2 > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 3 > (array< float, 3 > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 4 > (array< float, 4 > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 8 > (array< float, 8 > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 16 > (array< float, 16 > &b)
 
template<>
CK_TILE_DEVICE void insert_dummy_dep_per_dword< 32 > (array< float, 32 > &b)
 
CK_TILE_DEVICE void insert_dummy_dep ()
 
template<typename T >
CK_TILE_DEVICE void insert_dummy_dep (T &buffer)
 
template<typename Tx , typename... Ty>
CK_TILE_DEVICE void insert_dummy_dep (Tx &bx, Ty &... by)
 
template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICEgetv (const tuple_object< I, T, true > &)
 
template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE const T & getv (const tuple_object< I, T, false > &x)
 
template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE T & getv (tuple_object< I, T, false > &x)
 
template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE T && getv (tuple_object< I, T, false > &&x)
 
template<typename SrcT , typename DstT , bool clip = true, bool stoch = false>
CK_TILE_HOST_DEVICE DstT run_cast_to_f8 (SrcT src, unsigned int rng=0)
 
template<typename SrcT , typename DstT , bool clip = true>
CK_TILE_HOST_DEVICE DstT run_cast_from_f8 (SrcT x)
 
template<typename X , typename Y , bool clip, bool stoch>
CK_TILE_HOST_DEVICEcast_to_f8 (X x, uint32_t rng)
 
template<typename OutDataType , typename InTensor >
CK_TILE_DEVICE auto cast_tile_pk_fp8_fp32 (const InTensor &in_dstr_tensors)
 
template<typename OutDataType , typename InTensor >
CK_TILE_DEVICE auto cast_tile_pk_fp16_fp32 (const InTensor &in_dstr_tensors)
 

Typedef Documentation

◆ at_index_t

template<index_t I, typename... Ts>
using ck_tile::impl::at_index_t = typedef __type_pack_element<I, Ts...>

◆ has_is_static

template<typename T >
using ck_tile::impl::has_is_static = typedef decltype(T::is_static())

◆ RawIntegerType

template<typename T >
using ck_tile::impl::RawIntegerType = typedef typename RawIntegerType_<sizeof(T)>::type

Function Documentation

◆ cast_tile_pk_fp16_fp32()

template<typename OutDataType , typename InTensor >
CK_TILE_DEVICE auto ck_tile::impl::cast_tile_pk_fp16_fp32 ( const InTensor &  in_dstr_tensors)

◆ cast_tile_pk_fp8_fp32()

template<typename OutDataType , typename InTensor >
CK_TILE_DEVICE auto ck_tile::impl::cast_tile_pk_fp8_fp32 ( const InTensor &  in_dstr_tensors)

◆ cast_to_f8()

template<typename X , typename Y , bool clip, bool stoch>
CK_TILE_HOST_DEVICE Y ck_tile::impl::cast_to_f8 ( x,
uint32_t  rng 
)

◆ getv() [1/4]

template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE const T& ck_tile::impl::getv ( const tuple_object< I, T, false > &  x)
constexpr

◆ getv() [2/4]

template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE T ck_tile::impl::getv ( const tuple_object< I, T, true > &  )
constexpr

◆ getv() [3/4]

template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE T&& ck_tile::impl::getv ( tuple_object< I, T, false > &&  x)
constexpr

◆ getv() [4/4]

template<index_t I, class T >
constexpr CK_TILE_HOST_DEVICE T& ck_tile::impl::getv ( tuple_object< I, T, false > &  x)
constexpr

◆ insert_dummy_dep() [1/3]

CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep ( )

◆ insert_dummy_dep() [2/3]

template<typename T >
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep ( T &  buffer)

◆ insert_dummy_dep() [3/3]

template<typename Tx , typename... Ty>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep ( Tx &  bx,
Ty &...  by 
)

◆ insert_dummy_dep_per_dword()

template<index_t N>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword ( array< float, N > &  b)

◆ insert_dummy_dep_per_dword< 16 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 16 > ( array< float, 16 > &  b)

◆ insert_dummy_dep_per_dword< 2 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 2 > ( array< float, 2 > &  b)

◆ insert_dummy_dep_per_dword< 3 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 3 > ( array< float, 3 > &  b)

◆ insert_dummy_dep_per_dword< 32 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 32 > ( array< float, 32 > &  b)

◆ insert_dummy_dep_per_dword< 4 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 4 > ( array< float, 4 > &  b)

◆ insert_dummy_dep_per_dword< 8 >()

template<>
CK_TILE_DEVICE void ck_tile::impl::insert_dummy_dep_per_dword< 8 > ( array< float, 8 > &  b)

◆ run_cast_from_f8()

template<typename SrcT , typename DstT , bool clip = true>
CK_TILE_HOST_DEVICE DstT ck_tile::impl::run_cast_from_f8 ( SrcT  x)

◆ run_cast_to_f8()

template<typename SrcT , typename DstT , bool clip = true, bool stoch = false>
CK_TILE_HOST_DEVICE DstT ck_tile::impl::run_cast_to_f8 ( SrcT  src,
unsigned int  rng = 0 
)