/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/arch/arch.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/arch/arch.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/arch/arch.hpp Source File
arch.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 // Address Space for AMDGCN
7 // https://llvm.org/docs/AMDGPUUsage.html#address-space
8 
12 
13 namespace ck_tile {
14 
15 template <typename, bool>
17 
18 template <typename T>
19 struct safe_underlying_type<T, true>
20 {
21  using type = std::underlying_type_t<T>;
22 };
23 
24 template <typename T>
25 struct safe_underlying_type<T, false>
26 {
27  using type = void;
28 };
29 
30 template <typename T>
32 
33 enum struct address_space_enum : std::uint16_t
34 {
35  generic = 0,
36  global,
37  lds,
38  sgpr,
39  constant,
40  vgpr
41 };
42 
43 enum struct memory_operation_enum : std::uint16_t
44 {
45  set = 0,
46  atomic_add,
47  atomic_max,
48  add
49 };
50 
52 {
53  // warpSize is defined by HIP
54  return warpSize;
55 }
56 
57 CK_TILE_DEVICE index_t get_grid_size() { return gridDim.x; }
58 
59 CK_TILE_DEVICE index_t get_block_size() { return blockDim.x; }
60 
61 // TODO: deprecate these
63 
64 CK_TILE_DEVICE index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }
65 
66 CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }
67 
68 // Use these instead
69 CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }
70 
72 {
73  return __builtin_amdgcn_readfirstlane(threadIdx.x / get_warp_size());
74 }
75 
76 CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }
77 
78 CK_TILE_DEVICE index_t get_block_id() { return blockIdx.x; }
79 
81 {
82 #if CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
83  // asm volatile("\
84  // s_waitcnt lgkmcnt(0) \n \
85  // s_barrier \
86  // " ::);
87 
88  __builtin_amdgcn_s_waitcnt(0xc07f);
89  __builtin_amdgcn_s_barrier();
90 #else
91  __syncthreads();
92 #endif
93 }
94 
96 {
97 #ifdef __gfx12__
98  asm volatile("s_wait_loadcnt %0 \n"
99  "s_barrier_signal -1 \n"
100  "s_barrier_wait -1"
101  :
102  : "n"(cnt)
103  : "memory");
104 #else
105  asm volatile("s_waitcnt vmcnt(%0) \n"
106  "s_barrier"
107  :
108  : "n"(cnt)
109  : "memory");
110 #endif
111 }
112 
114 {
115  asm volatile("\
116  s_waitcnt vmcnt(0) \n \
117  s_waitcnt lgkmcnt(0) \n \
118  s_barrier \
119  " ::);
120 }
121 
123 {
124 #if 1
125  asm volatile("s_nop %0" : : "n"(cnt) :);
126 #else
127  __builtin_amdgcn_sched_barrier(cnt);
128 #endif
129 }
130 
131 #define CK_CONSTANT_ADDRESS_SPACE \
132  __attribute__((address_space( \
133  static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))
134 
135 template <typename T>
137 {
138  // cast a pointer in "Constant" address space (4) to "Generic" address space (0)
139  // only c-style pointer cast seems be able to be compiled
140 #pragma clang diagnostic push
141 #pragma clang diagnostic ignored "-Wold-style-cast"
142  return (T*)(p); // NOLINT(old-style-cast)
143 #pragma clang diagnostic pop
144 }
145 
146 template <typename T>
148 {
149  // cast a pointer in "Generic" address space (0) to "Constant" address space (4)
150  // only c-style pointer cast seems be able to be compiled;
151 #pragma clang diagnostic push
152 #pragma clang diagnostic ignored "-Wold-style-cast"
153  return (T CK_CONSTANT_ADDRESS_SPACE*)p; // NOLINT(old-style-cast)
154 #pragma clang diagnostic pop
155 }
156 
157 } // namespace ck_tile
#define CK_CONSTANT_ADDRESS_SPACE
Definition: arch.hpp:131
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE index_t get_warp_size()
Definition: arch.hpp:51
CK_TILE_DEVICE index_t get_lane_id()
Definition: arch.hpp:69
CK_TILE_DEVICE void atomic_add(X *p_dst, const X &x)
memory_operation_enum
Definition: arch.hpp:44
CK_TILE_HOST_DEVICE T add(const T &a, const T &b)
Definition: generic_memory_space_atomic.hpp:12
CK_TILE_DEVICE index_t get_block_1d_id()
Definition: arch.hpp:66
CK_TILE_DEVICE void block_sync_lds()
Definition: arch.hpp:80
int32_t index_t
Definition: integer.hpp:9
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE * cast_pointer_to_constant_address_space(T *p)
Definition: arch.hpp:147
CK_TILE_DEVICE void block_sync_lds_direct_load()
Definition: arch.hpp:113
CK_TILE_DEVICE void s_nop(index_t cnt=0)
Definition: arch.hpp:122
CK_TILE_DEVICE index_t get_thread_local_1d_id()
Definition: arch.hpp:62
CK_TILE_DEVICE index_t get_block_size()
Definition: arch.hpp:59
CK_TILE_DEVICE index_t get_warp_id()
Definition: arch.hpp:71
__device__ T * cast_pointer_to_generic_address_space(T CK_CONSTANT_ADDRESS_SPACE *p)
Definition: arch.hpp:136
CK_TILE_DEVICE index_t get_thread_id()
Definition: arch.hpp:76
CK_TILE_DEVICE index_t get_thread_global_1d_id()
Definition: arch.hpp:64
address_space_enum
Definition: arch.hpp:34
CK_TILE_DEVICE index_t get_block_id()
Definition: arch.hpp:78
CK_TILE_DEVICE index_t get_grid_size()
Definition: arch.hpp:57
CK_TILE_DEVICE void block_sync_load_raw(index_t cnt=0)
Definition: arch.hpp:95
typename safe_underlying_type< T, std::is_enum< T >::value >::type safe_underlying_type_t
Definition: arch.hpp:31
void type
Definition: arch.hpp:27
std::underlying_type_t< T > type
Definition: arch.hpp:21
Definition: arch.hpp:16