/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/arch.hpp File Reference#
arch.hpp File Reference
#include "ck_tile/core/config.hpp"
#include "ck_tile/core/numeric/integer.hpp"
#include "ck_tile/core/numeric/integral_constant.hpp"
#include "ck_tile/core/arch/amd_buffer_addressing_builtins.hpp"
#include "ck_tile/core/arch/amd_buffer_addressing.hpp"
#include "ck_tile/core/utility/ignore.hpp"
Go to the source code of this file.
Classes | |
struct | gfx11_t |
struct | gfx12_t |
Macros | |
#define | CK_TILE_S_CNT_MAX 0b1100'1111'0111'1111#define CK_TILE_VMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 6), "VMCNT only has 6 bits"); }(), \ ((cnt) & 0b1111) | (((cnt) & 0b110000) << 10))#define CK_TILE_EXPCNT(cnt) \ ([]() { static_assert(!((cnt) >> 3), "EXP only has 3 bits"); }(), ((cnt) << 4))#define CK_TILE_LGKMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 4), "LGKM only has 4 bits"); }(), ((cnt) << 8))namespace ck_tile {template <typename, bool>struct safe_underlying_type;template <typename T>struct safe_underlying_type<T, true>{ using type = std::underlying_type_t<T>;};template <typename T>struct safe_underlying_type<T, false>{ using type = void;};template <typename T>using safe_underlying_type_t = typename safe_underlying_type<T, std::is_enum<T>::value>::type;enum struct address_space_enum : std::uint16_t{ generic = 0, global, lds, sgpr, constant, vgpr};enum struct memory_operation_enum : std::uint16_t{ set = 0, atomic_add, atomic_max, add};CK_TILE_HOST_DEVICE constexpr index_t get_warp_size(){#if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__) return 64;#else return 32;#endif}CK_TILE_HOST bool is_wave32(){ hipDeviceProp_t props{}; int device; auto status = hipGetDevice(&device); if(status != hipSuccess) { return false; } status = hipGetDeviceProperties(&props, device); if(status != hipSuccess) { return false; } return props.major > 9;}CK_TILE_DEVICE index_t get_grid_size() { return gridDim.x; }CK_TILE_DEVICE index_t get_block_size() { return blockDim.x; }CK_TILE_DEVICE index_t get_thread_local_1d_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }template <bool ReturnSgpr = true>CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {}){ const index_t warp_id = threadIdx.x / get_warp_size(); if constexpr(ReturnSgpr) { return amd_wave_read_first_lane(warp_id); } else { return warp_id; }}CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_block_id() { return blockIdx.x; }CK_TILE_DEVICE void block_sync_load_raw(index_t cnt = 0){#ifdef __gfx12__ asm volatile("s_wait_loadcnt %0 \n" "s_barrier_signal -1 \n" "s_barrier_wait -1" : : "n"(cnt) : "memory");#else asm volatile("s_waitcnt vmcnt(%0) \n" "s_barrier" : : "n"(cnt) : "memory");#endif}struct waitcnt_arg{#if defined(__gfx12__) CK_TILE_DEVICE static constexpr index_t MAX = 0b00'111111'00'111111; |
#define | CK_CONSTANT_ADDRESS_SPACE |
Enumerations | |
enum | LLVMSchedGroupMask : int32_t { NONE = 0 , ALU = 1 << 0 , VALU = 1 << 1 , SALU = 1 << 2 , MFMA = 1 << 3 , VMEM = 1 << 4 , VMEM_READ = 1 << 5 , VMEM_WRITE = 1 << 6 , DS = 1 << 7 , DS_READ = 1 << 8 , DS_WRITE = 1 << 9 , ALL = (DS_WRITE << 1) - 1 } |
Functions | |
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt> | |
CK_TILE_DEVICE void | s_waitcnt () |
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt> | |
CK_TILE_DEVICE void | s_waitcnt_barrier () |
template<index_t lgkmcnt = 0> | |
CK_TILE_DEVICE void | block_sync_lds () |
template<index_t vmcnt = 0> | |
CK_TILE_DEVICE void | block_sync_lds_direct_load () |
CK_TILE_DEVICE void | s_nop (index_t cnt=0) |
template<typename T > | |
__device__ T * | cast_pointer_to_generic_address_space (T CK_CONSTANT_ADDRESS_SPACE *p) |
template<typename T > | |
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE * | cast_pointer_to_constant_address_space (T *p) |
constexpr CK_TILE_HOST_DEVICE index_t | get_smem_capacity () |
constexpr CK_TILE_HOST_DEVICE const char * | address_space_to_string (address_space_enum addr_space) |
Helper function to convert address space enum to string. More... | |
Macro Definition Documentation
◆ CK_CONSTANT_ADDRESS_SPACE
#define CK_CONSTANT_ADDRESS_SPACE |
Value:
__attribute__((address_space( \
static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))
◆ CK_TILE_S_CNT_MAX
#define CK_TILE_S_CNT_MAX 0b1100'1111'0111'1111#define CK_TILE_VMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 6), "VMCNT only has 6 bits"); }(), \ ((cnt) & 0b1111) | (((cnt) & 0b110000) << 10))#define CK_TILE_EXPCNT(cnt) \ ([]() { static_assert(!((cnt) >> 3), "EXP only has 3 bits"); }(), ((cnt) << 4))#define CK_TILE_LGKMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 4), "LGKM only has 4 bits"); }(), ((cnt) << 8))namespace ck_tile {template <typename, bool>struct safe_underlying_type;template <typename T>struct safe_underlying_type<T, true>{ using type = std::underlying_type_t<T>;};template <typename T>struct safe_underlying_type<T, false>{ using type = void;};template <typename T>using safe_underlying_type_t = typename safe_underlying_type<T, std::is_enum<T>::value>::type;enum struct address_space_enum : std::uint16_t{ generic = 0, global, lds, sgpr, constant, vgpr};enum struct memory_operation_enum : std::uint16_t{ set = 0, atomic_add, atomic_max, add};CK_TILE_HOST_DEVICE constexpr index_t get_warp_size(){#if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__) return 64;#else return 32;#endif}CK_TILE_HOST bool is_wave32(){ hipDeviceProp_t props{}; int device; auto status = hipGetDevice(&device); if(status != hipSuccess) { return false; } status = hipGetDeviceProperties(&props, device); if(status != hipSuccess) { return false; } return props.major > 9;}CK_TILE_DEVICE index_t get_grid_size() { return gridDim.x; }CK_TILE_DEVICE index_t get_block_size() { return blockDim.x; }CK_TILE_DEVICE index_t get_thread_local_1d_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }template <bool ReturnSgpr = true>CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {}){ const index_t warp_id = threadIdx.x / get_warp_size(); if constexpr(ReturnSgpr) { return amd_wave_read_first_lane(warp_id); } else { return warp_id; }}CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_block_id() { return blockIdx.x; }CK_TILE_DEVICE void block_sync_load_raw(index_t cnt = 0){#ifdef __gfx12__ asm volatile("s_wait_loadcnt %0 \n" "s_barrier_signal -1 \n" "s_barrier_wait -1" : : "n"(cnt) : "memory");#else asm volatile("s_waitcnt vmcnt(%0) \n" "s_barrier" : : "n"(cnt) : "memory");#endif}struct waitcnt_arg{#if defined(__gfx12__) CK_TILE_DEVICE static constexpr index_t MAX = 0b00'111111'00'111111; |
Enumeration Type Documentation
◆ LLVMSchedGroupMask
enum LLVMSchedGroupMask : int32_t |
Function Documentation
◆ address_space_to_string()
|
constexpr |
Helper function to convert address space enum to string.
◆ block_sync_lds()
template<index_t lgkmcnt = 0>
CK_TILE_DEVICE void block_sync_lds | ( | ) |
◆ block_sync_lds_direct_load()
template<index_t vmcnt = 0>
CK_TILE_DEVICE void block_sync_lds_direct_load | ( | ) |
◆ cast_pointer_to_constant_address_space()
template<typename T >
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_address_space | ( | T * | p | ) |
◆ cast_pointer_to_generic_address_space()
template<typename T >
__device__ T* cast_pointer_to_generic_address_space | ( | T CK_CONSTANT_ADDRESS_SPACE * | p | ) |
◆ get_smem_capacity()
|
constexpr |
◆ s_nop()
CK_TILE_DEVICE void s_nop | ( | index_t | cnt = 0 | ) |
◆ s_waitcnt()
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt | ( | ) |
◆ s_waitcnt_barrier()
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt_barrier | ( | ) |