/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File
config.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
6 #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__) || \
7  defined(__gfx9_4_generic__)
8 #define __gfx9__
9 #endif
10 #if defined(__gfx942__) || defined(__gfx950__) || defined(__gfx9_4_generic__)
11 #define __gfx94__
12 #endif
13 #if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \
14  defined(__gfx1013__) || defined(__gfx10_1_generic__)
15 #define __gfx101__
16 #endif
17 #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
18  defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || \
19  defined(__gfx10_3_generic__)
20 #define __gfx103__
21 #endif
22 #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \
23  defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \
24  defined(__gfx1152__) || defined(__gfx1153__) || defined(__gfx11_generic__)
25 #define __gfx11__
26 #endif
27 #if defined(__gfx1200__) || defined(__gfx1201__) || defined(__gfx12_generic__)
28 #define __gfx12__
29 #endif
30 
31 #include "hip/hip_version.h"
32 #ifndef CK_TILE_DONT_USE_HIP_RUNTIME_HEADERS
33 #include "hip/hip_runtime.h"
34 #include "hip/hip_fp16.h"
35 #endif
36 
37 #ifdef __HIPCC__
38 #define CK_TILE_HOST inline __host__
39 #define CK_TILE_DEVICE inline __device__
40 #define CK_TILE_HOST_DEVICE inline __host__ __device__
41 #define CK_TILE_DEVICE_EXTERN __device__
42 #if __clang_major__ < 22
43 #define CK_TILE_HOST_DEVICE_EXTERN __host__ __device__
44 #else
45 #define CK_TILE_HOST_DEVICE_EXTERN
46 #endif
47 #else
48 #define CK_TILE_HOST inline
49 #define CK_TILE_DEVICE inline
50 #define CK_TILE_HOST_DEVICE inline
51 #define CK_TILE_DEVICE_EXTERN
52 #define CK_TILE_HOST_DEVICE_EXTERN
53 #endif
54 
55 // implementing the "memory address space" attribute
56 // https://llvm.org/docs/AMDGPUUsage.html#amdgpu-address-spaces-table
57 // WA for https://github.com/ROCm/composable_kernel/issues/1946
58 #if 0
59 #define CK_TILE_GENERIC_ADDR __attribute__((address_space(0)))
60 #define CK_TILE_GLOBAL_ADDR __attribute__((address_space(1)))
61 #define CK_TILE_LDS_ADDR __attribute__((address_space(3)))
62 #define CK_TILE_BUF_RES_ADDR __attribute__((address_space(8)))
63 #else
64 #define CK_TILE_GENERIC_ADDR
65 #define CK_TILE_GLOBAL_ADDR
66 #define CK_TILE_LDS_ADDR
67 #define CK_TILE_BUF_RES_ADDR
68 #endif
69 #ifndef CK_TILE_USE_CUSTOM_DATA_TYPE
70 #define CK_TILE_USE_CUSTOM_DATA_TYPE 0 // custom data type will generate extra move/bfi code
71 #endif
72 
73 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD 0
74 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN 1
75 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE 2
76 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM 3
77 #define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM 4
78 
79 #ifndef CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT
80 #define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE
81 #endif
82 
83 #define CK_TILE_FLOAT_TO_FP8_STANDARD 0
84 #define CK_TILE_FLOAT_TO_FP8_STOCHASTIC 1
85 
86 #ifndef CK_TILE_FLOAT_TO_FP8_DEFAULT
87 #define CK_TILE_FLOAT_TO_FP8_DEFAULT CK_TILE_FLOAT_TO_FP8_STANDARD
88 #endif
89 
90 // in the old rocm period, we have to use tuple array implementation to implement this
91 // so turn on the _USE_TUPLE if meet compiler error, otherwise _USE_ARRAY by default.
92 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY 0
93 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE 1
94 #ifndef CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT
95 #define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE
96 #endif
97 
98 #define CK_TILE_THREAD_BUFFER_USE_ARRAY 0
99 #define CK_TILE_THREAD_BUFFER_USE_TUPLE 1
100 #ifndef CK_TILE_THREAD_BUFFER_DEFAULT
101 #define CK_TILE_THREAD_BUFFER_DEFAULT CK_TILE_THREAD_BUFFER_USE_ARRAY
102 #endif
103 
104 #ifndef CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST
105 #if CK_TILE_THREAD_BUFFER_DEFAULT == CK_TILE_THREAD_BUFFER_USE_TUPLE
106 // if using tuple-array as thread_buffer implementation, need to support {} brace init
107 // ... with similiar behavior as array
108 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 1
109 #else
110 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 0
111 #endif
112 #endif
113 
114 #ifndef CK_TILE_USE_LAUNCH_BOUNDS
115 #define CK_TILE_USE_LAUNCH_BOUNDS 1
116 #endif
117 
118 #ifndef CK_TILE_TIME_KERNEL
119 #define CK_TILE_TIME_KERNEL 1
120 #endif
121 
122 #define CK_TILE_MAX_THREAD_PER_BLOCK 256
123 #define CK_TILE_MIN_BLOCK_PER_CU 2
124 
125 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
126 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 0
127 #endif
128 
129 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
130 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK 1
131 #endif
132 
133 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
134 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK 1
135 #endif
136 
137 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK
138 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK 1
139 #endif
140 
141 #ifndef CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM
142 #define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 1
143 #endif
144 
145 #ifndef CK_TILE_USE_AMD_BUFFER_LOAD
146 #define CK_TILE_USE_AMD_BUFFER_LOAD 1
147 #endif
148 
149 #ifndef CK_TILE_USE_AMD_BUFFER_STORE
150 #define CK_TILE_USE_AMD_BUFFER_STORE 1
151 #endif
152 
153 #ifndef CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER
154 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER 1
155 #endif
156 
157 #ifndef CK_TILE_USE_PK4_LAYOUT_SHUFFLE
158 #define CK_TILE_USE_PK4_LAYOUT_SHUFFLE 1
159 #endif
160 
161 // buffer atomic add: floating point
162 #ifndef __HIP_DEVICE_COMPILE__ // for host code
163 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
164 #elif defined(__gfx9__) || defined(__gfx12__) // for GPU code
165 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
166 #else // for GPU code
167 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 0
168 #endif
169 
170 #if(defined(__gfx90a__) || defined(__gfx94__)) // for GPU code
171 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 1
172 #else
173 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 0
174 #endif
175 
176 #ifndef CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
177 #define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 0
178 #endif
179 
180 #ifndef CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
181 #define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE 1
182 #endif
183 
184 #ifndef CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE
185 #if HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 1 && HIP_VERSION_PATCH >= 40091
186 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 1
187 #else
188 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 0
189 #endif
190 #endif
191 
192 // workaround for ROCm 6.2 and later
193 #ifndef CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE
194 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133) || \
195  (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 3 && HIP_VERSION_PATCH >= 42131) || \
196  (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR > 3)
197 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 1
198 #else
199 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 0
200 #endif
201 #endif
202 
203 // use llvm builtin bf16 data type after ROCm 6.5
204 #ifndef CK_TILE_USE_LLVM_BUILTIN_BF16
205 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 5 && HIP_VERSION_PATCH >= 50421) || \
206  (HIP_VERSION_MAJOR >= 7)
207 #define CK_TILE_USE_LLVM_BUILTIN_BF16 1
208 #else
209 #define CK_TILE_USE_LLVM_BUILTIN_BF16 0
210 #endif
211 #endif
212 
213 #ifndef CK_TILE_DEBUG_LOG
214 #define CK_TILE_DEBUG_LOG 0
215 #endif
216 
217 #ifndef __HIP_DEVICE_COMPILE__ // for host code
218 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0xffffffff
219 #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || \
220  defined(__gfx9__) // for GPU code
221 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x00020000
222 #elif defined(__gfx101__) || defined(__gfx103__) // for GPU code
223 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31014000
224 #elif defined(__gfx11__) || defined(__gfx12__) // for GPU code
225 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31004000
226 #endif
227 
228 #ifndef CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
229 #define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
230 #endif
231 
232 #ifndef CK_TILE_USE_SUBDWORD_TILE_CAST
233 #define CK_TILE_USE_SUBDWORD_TILE_CAST 0
234 #endif
235 
236 #ifndef CK_TILE_USE_PK_FP16_TILE_CAST
237 #define CK_TILE_USE_PK_FP16_TILE_CAST 0
238 #endif
239 
240 // TODO: better solve this inside compiler
241 #ifndef CK_TILE_FMHA_FWD_FAST_EXP2
242 #define CK_TILE_FMHA_FWD_FAST_EXP2 0
243 #endif
244 
245 #ifndef CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
246 #define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 0
247 #endif
248 
249 #ifndef CK_TILE_BUFFER_LOAD_RAW_BF16_WA
250 #define CK_TILE_BUFFER_LOAD_RAW_BF16_WA 1
251 #endif
252 
253 // workaround: compiler not emiting reciprocal instruction frm __frcp_rn()
254 #ifndef CK_TILE_WORKAROUND_SWDEV_383542
255 #define CK_TILE_WORKAROUND_SWDEV_383542 1
256 #endif
257 
258 #ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
259 #define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1
260 #endif
261 
262 #ifndef CK_TILE_USE_OCP_FP8
263 #if defined(__HIP_DEVICE_COMPILE__)
264 #if defined(__gfx950__) || defined(__gfx12__)
265 #define CK_TILE_USE_OCP_FP8 1
266 #else
267 #define CK_TILE_USE_OCP_FP8 0
268 #endif
269 #else
270 #define CK_TILE_USE_OCP_FP8 0
271 #endif
272 #endif
273 
274 #ifndef CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN
275 #if __clang_major__ >= 20 && !(defined(__gfx103__) || defined(__gfx11__) || defined(__gfx12__))
276 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 1
277 #else
278 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 0
279 #endif
280 #endif
281 
282 #ifndef CK_TILE_WA_ISSUE_2028
283 #define CK_TILE_WA_ISSUE_2028 0
284 #endif
285 
286 // Y pointed to R, we don't see a valuable use case.
287 // Will enforce encoding to check Y not pointed to R if set to zero
288 #ifndef CK_TILE_ENC_SUPPORT_Y_TO_R
289 #define CK_TILE_ENC_SUPPORT_Y_TO_R 0
290 #endif
291 
292 // Mark unsupported features with a deprecation warning in debug builds
293 #if defined(NDEBUG)
294 #define CK_TILE_UNSUPPORTED_IMPL(MSG)
295 #else
296 #define CK_TILE_UNSUPPORTED_IMPL(MSG) __attribute__((deprecated(MSG)))
297 #endif
298 
299 namespace ck_tile::core {
328 {
329  // Determine if we are compiling for device or host
330 #if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__
331  static constexpr bool CK_TILE_DEVICE_COMPILE = true;
332  static constexpr bool CK_TILE_HOST_COMPILE = false;
333 #else
334  static constexpr bool CK_TILE_DEVICE_COMPILE = false;
335  static constexpr bool CK_TILE_HOST_COMPILE = true;
336 #endif // __HIP_DEVICE_COMPILE__ && __HIP_DEVICE_COMPILE__
337 
338  // GFX9
339 #if defined(__gfx908__)
340  static constexpr bool CK_TILE_ARCH_GFX908 = true;
341 #else
342  static constexpr bool CK_TILE_ARCH_GFX908 = false;
343 #endif // __gfx908__
344 
345 #if defined(__gfx90a__)
346  static constexpr bool CK_TILE_ARCH_GFX90A = true;
347 #else
348  static constexpr bool CK_TILE_ARCH_GFX90A = false;
349 #endif // __gfx90a__
350 
351 #if defined(__gfx942__)
352  static constexpr bool CK_TILE_ARCH_GFX942 = true;
353 #else
354  static constexpr bool CK_TILE_ARCH_GFX942 = false;
355 #endif // __gfx942__
356 
357 #if defined(__gfx950__)
358  static constexpr bool CK_TILE_ARCH_GFX950 = true;
359 #else
360  static constexpr bool CK_TILE_ARCH_GFX950 = false;
361 #endif // __gfx950__
362 
363  // GFX10
364 #if defined(__gfx1010__)
365  static constexpr bool CK_TILE_ARCH_GFX1010 = true;
366 #else
367  static constexpr bool CK_TILE_ARCH_GFX1010 = false;
368 #endif
369 
370 #if defined(__gfx1030__)
371  static constexpr bool CK_TILE_ARCH_GFX1030 = true;
372 #else
373  static constexpr bool CK_TILE_ARCH_GFX1030 = false;
374 #endif // __gfx1030__
375 
376 #if defined(__gfx1031__)
377  static constexpr bool CK_TILE_ARCH_GFX1031 = true;
378 #else
379  static constexpr bool CK_TILE_ARCH_GFX1031 = false;
380 #endif // __gfx1031__
381 
382 #if defined(__gfx1032__)
383  static constexpr bool CK_TILE_ARCH_GFX1032 = true;
384 #else
385  static constexpr bool CK_TILE_ARCH_GFX1032 = false;
386 #endif // __gfx1032__
387 
388 #if defined(__gfx1034__)
389  static constexpr bool CK_TILE_ARCH_GFX1034 = true;
390 #else
391  static constexpr bool CK_TILE_ARCH_GFX1034 = false;
392 #endif // __gfx1034__
393 
394 #if defined(__gfx1035__)
395  static constexpr bool CK_TILE_ARCH_GFX1035 = true;
396 #else
397  static constexpr bool CK_TILE_ARCH_GFX1035 = false;
398 #endif // __gfx1035__
399 
400 #if defined(__gfx1036__)
401  static constexpr bool CK_TILE_ARCH_GFX1036 = true;
402 #else
403  static constexpr bool CK_TILE_ARCH_GFX1036 = false;
404 #endif // __gfx1036__
405 
406 #if defined(__gfx10_3_generic__)
407  static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC = true;
408 #else
409  static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC = false;
410 #endif // __gfx10_3_generic__
411 
412  // GFX11
413 #if defined(__gfx1100__)
414  static constexpr bool CK_TILE_ARCH_GFX1100 = true;
415 #else
416  static constexpr bool CK_TILE_ARCH_GFX1100 = false;
417 #endif // __gfx1100__
418 
419 #if defined(__gfx1101__)
420  static constexpr bool CK_TILE_ARCH_GFX1101 = true;
421 #else
422  static constexpr bool CK_TILE_ARCH_GFX1101 = false;
423 #endif // __gfx1101__
424 
425 #if defined(__gfx1102__)
426  static constexpr bool CK_TILE_ARCH_GFX1102 = true;
427 #else
428  static constexpr bool CK_TILE_ARCH_GFX1102 = false;
429 #endif // __gfx1102__
430 
431 #if defined(__gfx1103__)
432  static constexpr bool CK_TILE_ARCH_GFX1103 = true;
433 #else
434  static constexpr bool CK_TILE_ARCH_GFX1103 = false;
435 #endif // __gfx1103__
436 
437 #if defined(__gfx1150__)
438  static constexpr bool CK_TILE_ARCH_GFX1150 = true;
439 #else
440  static constexpr bool CK_TILE_ARCH_GFX1150 = false;
441 #endif // __gfx1150__
442 
443 #if defined(__gfx1151__)
444  static constexpr bool CK_TILE_ARCH_GFX1151 = true;
445 #else
446  static constexpr bool CK_TILE_ARCH_GFX1151 = false;
447 #endif // __gfx1151__
448 
449 #if defined(__gfx1152__)
450  static constexpr bool CK_TILE_ARCH_GFX1152 = true;
451 #else
452  static constexpr bool CK_TILE_ARCH_GFX1152 = false;
453 #endif // __gfx1152__
454 
455 #if defined(__gfx11_generic__)
456  static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = true;
457 #else
458  static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = false;
459 #endif // __gfx11_generic__
460 
461  // GFX12
462 #if defined(__gfx1200__)
463  static constexpr bool CK_TILE_ARCH_GFX1200 = true;
464 #else
465  static constexpr bool CK_TILE_ARCH_GFX1200 = false;
466 #endif // __gfx1200__
467 
468 #if defined(__gfx1201__)
469  static constexpr bool CK_TILE_ARCH_GFX1201 = true;
470 #else
471  static constexpr bool CK_TILE_ARCH_GFX1201 = false;
472 #endif // __gfx1201__
473 
474 #if defined(__gfx12_generic__)
475  static constexpr bool CK_TILE_ARCH_GFX12_GENERIC = true;
476 #else
477  static constexpr bool CK_TILE_ARCH_GFX12_GENERIC = false;
478 #endif // __gfx12_generic__
479 };
480 
489 template <typename T, typename... Ts>
490 // TODO: c++20 concept requires((std::is_convertible<Ts, T>::value && ...) && (sizeof...(Ts) >=
491 // 1))
492 CK_TILE_HOST_DEVICE static constexpr uint32_t count_values_of(T search, Ts... searchList)
493 {
494  static_assert((std::is_convertible<Ts, T>::value && ...),
495  "All search list values must be convertible to the search value type");
496  static_assert(sizeof...(Ts) >= 1, "At least one value must be provided to search in");
497 
498  return (static_cast<uint32_t>(search == static_cast<T>(searchList)) + ...);
499 }
500 
501 #define CK_TILE_COMPILER_TARGETS_LIST \
502  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX908, \
503  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX90A, \
504  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX942, \
505  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX950, \
506  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1010, \
507  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1030, \
508  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1031, \
509  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1032, \
510  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1034, \
511  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1035, \
512  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1036, \
513  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX10_3_GENERIC, \
514  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1100, \
515  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1101, \
516  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1102, \
517  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1103, \
518  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1150, \
519  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1151, \
520  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1152, \
521  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX11_GENERIC, \
522  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1200, \
523  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1201, \
524  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX12_GENERIC
525 
526 // Sanity check: make sure only one target architecture is defined during device compile
528  count_values_of(true, CK_TILE_COMPILER_TARGETS_LIST) == 1u,
529  "Only one target architecture can be defined during device compile");
530 
531 // Sanity check: make sure no device target architecture is defined during host compile
533  count_values_of(true, CK_TILE_COMPILER_TARGETS_LIST) == 0u,
534  "No device target architecture can be defined during host compile");
535 
536 } // namespace ck_tile::core
#define CK_TILE_COMPILER_TARGETS_LIST
Definition: config.hpp:501
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:50
Definition: amdgcn_mma.hpp:10
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
unsigned int uint32_t
Definition: stdint.h:126
Defines compiler states for supported AMDGCN devices.
Definition: config.hpp:328
static constexpr bool CK_TILE_ARCH_GFX90A
Definition: config.hpp:348
static constexpr bool CK_TILE_ARCH_GFX1032
Definition: config.hpp:385
static constexpr bool CK_TILE_ARCH_GFX908
Definition: config.hpp:342
static constexpr bool CK_TILE_ARCH_GFX11_GENERIC
Definition: config.hpp:458
static constexpr bool CK_TILE_ARCH_GFX1030
Definition: config.hpp:373
static constexpr bool CK_TILE_ARCH_GFX1036
Definition: config.hpp:403
static constexpr bool CK_TILE_ARCH_GFX1200
Definition: config.hpp:465
static constexpr bool CK_TILE_ARCH_GFX1035
Definition: config.hpp:397
static constexpr bool CK_TILE_ARCH_GFX1034
Definition: config.hpp:391
static constexpr bool CK_TILE_ARCH_GFX1152
Definition: config.hpp:452
static constexpr bool CK_TILE_ARCH_GFX1010
Definition: config.hpp:367
static constexpr bool CK_TILE_ARCH_GFX1031
Definition: config.hpp:379
static constexpr bool CK_TILE_ARCH_GFX1103
Definition: config.hpp:434
static constexpr bool CK_TILE_HOST_COMPILE
Definition: config.hpp:335
static constexpr bool CK_TILE_ARCH_GFX1100
Definition: config.hpp:416
static constexpr bool CK_TILE_ARCH_GFX1201
Definition: config.hpp:471
static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC
Definition: config.hpp:409
static constexpr bool CK_TILE_ARCH_GFX1101
Definition: config.hpp:422
static constexpr bool CK_TILE_ARCH_GFX942
Definition: config.hpp:354
static constexpr bool CK_TILE_DEVICE_COMPILE
Definition: config.hpp:334
static constexpr bool CK_TILE_ARCH_GFX1102
Definition: config.hpp:428
static constexpr bool CK_TILE_ARCH_GFX12_GENERIC
Definition: config.hpp:477
static constexpr bool CK_TILE_ARCH_GFX950
Definition: config.hpp:360
static constexpr bool CK_TILE_ARCH_GFX1150
Definition: config.hpp:440
static constexpr bool CK_TILE_ARCH_GFX1151
Definition: config.hpp:446