6 #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__) || \
7 defined(__gfx9_4_generic__)
10 #if defined(__gfx942__) || defined(__gfx950__) || defined(__gfx9_4_generic__)
13 #if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \
14 defined(__gfx1013__) || defined(__gfx10_1_generic__)
17 #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
18 defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || \
19 defined(__gfx10_3_generic__)
22 #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \
23 defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \
24 defined(__gfx1152__) || defined(__gfx1153__) || defined(__gfx11_generic__)
27 #if defined(__gfx1200__) || defined(__gfx1201__) || defined(__gfx12_generic__)
31 #include "hip/hip_version.h"
32 #ifndef CK_TILE_DONT_USE_HIP_RUNTIME_HEADERS
33 #include "hip/hip_runtime.h"
34 #include "hip/hip_fp16.h"
38 #define CK_TILE_HOST inline __host__
39 #define CK_TILE_DEVICE inline __device__
40 #define CK_TILE_HOST_DEVICE inline __host__ __device__
41 #define CK_TILE_DEVICE_EXTERN __device__
42 #define CK_TILE_HOST_DEVICE_EXTERN __host__ __device__
44 #define CK_TILE_HOST inline
45 #define CK_TILE_DEVICE inline
46 #define CK_TILE_HOST_DEVICE inline
47 #define CK_TILE_DEVICE_EXTERN
48 #define CK_TILE_HOST_DEVICE_EXTERN
55 #define CK_TILE_GENERIC_ADDR __attribute__((address_space(0)))
56 #define CK_TILE_GLOBAL_ADDR __attribute__((address_space(1)))
57 #define CK_TILE_LDS_ADDR __attribute__((address_space(3)))
58 #define CK_TILE_BUF_RES_ADDR __attribute__((address_space(8)))
60 #define CK_TILE_GENERIC_ADDR
61 #define CK_TILE_GLOBAL_ADDR
62 #define CK_TILE_LDS_ADDR
63 #define CK_TILE_BUF_RES_ADDR
65 #ifndef CK_TILE_USE_CUSTOM_DATA_TYPE
66 #define CK_TILE_USE_CUSTOM_DATA_TYPE 0
69 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD 0
70 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN 1
71 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE 2
72 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM 3
73 #define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM 4
75 #ifndef CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT
76 #define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE
79 #define CK_TILE_FLOAT_TO_FP8_STANDARD 0
80 #define CK_TILE_FLOAT_TO_FP8_STOCHASTIC 1
82 #ifndef CK_TILE_FLOAT_TO_FP8_DEFAULT
83 #define CK_TILE_FLOAT_TO_FP8_DEFAULT CK_TILE_FLOAT_TO_FP8_STANDARD
88 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY 0
89 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE 1
90 #ifndef CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT
91 #define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE
94 #define CK_TILE_THREAD_BUFFER_USE_ARRAY 0
95 #define CK_TILE_THREAD_BUFFER_USE_TUPLE 1
96 #ifndef CK_TILE_THREAD_BUFFER_DEFAULT
97 #define CK_TILE_THREAD_BUFFER_DEFAULT CK_TILE_THREAD_BUFFER_USE_ARRAY
100 #ifndef CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST
101 #if CK_TILE_THREAD_BUFFER_DEFAULT == CK_TILE_THREAD_BUFFER_USE_TUPLE
104 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 1
106 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 0
110 #ifndef CK_TILE_USE_LAUNCH_BOUNDS
111 #define CK_TILE_USE_LAUNCH_BOUNDS 1
114 #ifndef CK_TILE_TIME_KERNEL
115 #define CK_TILE_TIME_KERNEL 1
118 #define CK_TILE_MAX_THREAD_PER_BLOCK 256
119 #define CK_TILE_MIN_BLOCK_PER_CU 2
121 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
122 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 0
125 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
126 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK 1
129 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
130 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK 1
133 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK
134 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK 1
137 #ifndef CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM
138 #define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 1
141 #ifndef CK_TILE_USE_AMD_BUFFER_LOAD
142 #define CK_TILE_USE_AMD_BUFFER_LOAD 1
145 #ifndef CK_TILE_USE_AMD_BUFFER_STORE
146 #define CK_TILE_USE_AMD_BUFFER_STORE 1
149 #ifndef CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER
150 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER 1
153 #ifndef CK_TILE_USE_PK4_LAYOUT_SHUFFLE
154 #define CK_TILE_USE_PK4_LAYOUT_SHUFFLE 1
158 #ifndef __HIP_DEVICE_COMPILE__
159 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
160 #elif defined(__gfx9__) || defined(__gfx12__)
161 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
163 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 0
166 #if(defined(__gfx90a__) || defined(__gfx94__))
167 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 1
169 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 0
172 #ifndef CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
173 #define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 0
176 #ifndef CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
177 #define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE 1
180 #ifndef CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE
181 #if HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 1 && HIP_VERSION_PATCH >= 40091
182 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 1
184 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 0
189 #ifndef CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE
190 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133) || \
191 (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 3 && HIP_VERSION_PATCH >= 42131) || \
192 (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR > 3)
193 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 1
195 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 0
200 #ifndef CK_TILE_USE_LLVM_BUILTIN_BF16
201 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 5 && HIP_VERSION_PATCH >= 50421) || \
202 (HIP_VERSION_MAJOR >= 7)
203 #define CK_TILE_USE_LLVM_BUILTIN_BF16 1
205 #define CK_TILE_USE_LLVM_BUILTIN_BF16 0
209 #ifndef CK_TILE_DEBUG_LOG
210 #define CK_TILE_DEBUG_LOG 0
213 #ifndef __HIP_DEVICE_COMPILE__
214 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0xffffffff
215 #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || \
217 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x00020000
218 #elif defined(__gfx101__) || defined(__gfx103__)
219 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31014000
220 #elif defined(__gfx11__) || defined(__gfx12__)
221 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31004000
224 #ifndef CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
225 #define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
228 #ifndef CK_TILE_USE_SUBDWORD_TILE_CAST
229 #define CK_TILE_USE_SUBDWORD_TILE_CAST 0
232 #ifndef CK_TILE_USE_PK_FP16_TILE_CAST
233 #define CK_TILE_USE_PK_FP16_TILE_CAST 0
237 #ifndef CK_TILE_FMHA_FWD_FAST_EXP2
238 #define CK_TILE_FMHA_FWD_FAST_EXP2 0
241 #ifndef CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
242 #define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 0
245 #ifndef CK_TILE_BUFFER_LOAD_RAW_BF16_WA
246 #define CK_TILE_BUFFER_LOAD_RAW_BF16_WA 1
250 #ifndef CK_TILE_WORKAROUND_SWDEV_383542
251 #define CK_TILE_WORKAROUND_SWDEV_383542 1
254 #ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
255 #define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1
258 #ifndef CK_TILE_USE_OCP_FP8
259 #if defined(__HIP_DEVICE_COMPILE__)
260 #if defined(__gfx950__) || defined(__gfx12__)
261 #define CK_TILE_USE_OCP_FP8 1
263 #define CK_TILE_USE_OCP_FP8 0
266 #define CK_TILE_USE_OCP_FP8 0
270 #ifndef CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN
271 #if __clang_major__ >= 20 && !(defined(__gfx103__) || defined(__gfx11__) || defined(__gfx12__))
272 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 1
274 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 0
278 #ifndef CK_TILE_WA_ISSUE_2028
279 #define CK_TILE_WA_ISSUE_2028 0
284 #ifndef CK_TILE_ENC_SUPPORT_Y_TO_R
285 #define CK_TILE_ENC_SUPPORT_Y_TO_R 0
290 #define CK_TILE_UNSUPPORTED_IMPL(MSG)
292 #define CK_TILE_UNSUPPORTED_IMPL(MSG) __attribute__((deprecated(MSG)))
326 #if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__
335 #if defined(__gfx908__)
341 #if defined(__gfx90a__)
347 #if defined(__gfx942__)
353 #if defined(__gfx950__)
360 #if defined(__gfx1030__)
366 #if defined(__gfx1031__)
372 #if defined(__gfx1032__)
378 #if defined(__gfx1034__)
384 #if defined(__gfx1035__)
390 #if defined(__gfx1036__)
396 #if defined(__gfx10_3_generic__)
403 #if defined(__gfx1100__)
409 #if defined(__gfx1101__)
415 #if defined(__gfx1102__)
421 #if defined(__gfx1103__)
427 #if defined(__gfx1150__)
433 #if defined(__gfx1151__)
439 #if defined(__gfx1152__)
445 #if defined(__gfx11_generic__)
452 #if defined(__gfx1200__)
458 #if defined(__gfx1201__)
464 #if defined(__gfx12_generic__)
479 template <
typename T,
typename... Ts>
485 "All search list values must be convertible to the search value type");
486 static_assert(
sizeof...(Ts) >= 1,
"At least one value must be provided to search in");
488 return (
static_cast<uint32_t>(search ==
static_cast<T
>(searchList)) + ...);
491 #define CK_TILE_COMPILER_TARGETS_LIST \
492 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX908, \
493 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX90A, \
494 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX942, \
495 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX950, \
496 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1030, \
497 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1031, \
498 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1032, \
499 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1034, \
500 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1035, \
501 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1036, \
502 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX10_3_GENERIC, \
503 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1100, \
504 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1101, \
505 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1102, \
506 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1103, \
507 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1150, \
508 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1151, \
509 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1152, \
510 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX11_GENERIC, \
511 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1200, \
512 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1201, \
513 amdgcn_compiler_target_state::CK_TILE_ARCH_GFX12_GENERIC
518 "Only one target architecture can be defined during device compile");
523 "No device target architecture can be defined during host compile");
#define CK_TILE_COMPILER_TARGETS_LIST
Definition: config.hpp:491
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:46
Definition: amdgcn_mma.hpp:10
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
unsigned int uint32_t
Definition: stdint.h:126
Defines compiler states for supported AMDGCN devices.
Definition: config.hpp:324
static constexpr bool CK_TILE_ARCH_GFX90A
Definition: config.hpp:344
static constexpr bool CK_TILE_ARCH_GFX1032
Definition: config.hpp:375
static constexpr bool CK_TILE_ARCH_GFX908
Definition: config.hpp:338
static constexpr bool CK_TILE_ARCH_GFX11_GENERIC
Definition: config.hpp:448
static constexpr bool CK_TILE_ARCH_GFX1030
Definition: config.hpp:363
static constexpr bool CK_TILE_ARCH_GFX1036
Definition: config.hpp:393
static constexpr bool CK_TILE_ARCH_GFX1200
Definition: config.hpp:455
static constexpr bool CK_TILE_ARCH_GFX1035
Definition: config.hpp:387
static constexpr bool CK_TILE_ARCH_GFX1034
Definition: config.hpp:381
static constexpr bool CK_TILE_ARCH_GFX1152
Definition: config.hpp:442
static constexpr bool CK_TILE_ARCH_GFX1031
Definition: config.hpp:369
static constexpr bool CK_TILE_ARCH_GFX1103
Definition: config.hpp:424
static constexpr bool CK_TILE_HOST_COMPILE
Definition: config.hpp:331
static constexpr bool CK_TILE_ARCH_GFX1100
Definition: config.hpp:406
static constexpr bool CK_TILE_ARCH_GFX1201
Definition: config.hpp:461
static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC
Definition: config.hpp:399
static constexpr bool CK_TILE_ARCH_GFX1101
Definition: config.hpp:412
static constexpr bool CK_TILE_ARCH_GFX942
Definition: config.hpp:350
static constexpr bool CK_TILE_DEVICE_COMPILE
Definition: config.hpp:330
static constexpr bool CK_TILE_ARCH_GFX1102
Definition: config.hpp:418
static constexpr bool CK_TILE_ARCH_GFX12_GENERIC
Definition: config.hpp:467
static constexpr bool CK_TILE_ARCH_GFX950
Definition: config.hpp:356
static constexpr bool CK_TILE_ARCH_GFX1150
Definition: config.hpp:430
static constexpr bool CK_TILE_ARCH_GFX1151
Definition: config.hpp:436