/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.1.0/include/ck_tile/core/config.hpp Source File
    
    
 
              
                
                
                  
  
/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.1.0/include/ck_tile/core/config.hpp Source File
Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.1.0/include/ck_tile/core/config.hpp Source File
Go to the documentation of this file.
    6 #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__) 
    9 #if defined(__gfx942__) || defined(__gfx950__) 
   12 #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \ 
   13     defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || \ 
   14     defined(__gfx10_3_generic__) 
   17 #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \ 
   18     defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \ 
   19     defined(__gfx1152__) || defined(__gfx11_generic__) 
   22 #if defined(__gfx1200__) || defined(__gfx1201__) || defined(__gfx12_generic__) 
   26 #include "hip/hip_version.h" 
   27 #ifndef CK_TILE_DONT_USE_HIP_RUNTIME_HEADERS 
   28 #include "hip/hip_runtime.h" 
   29 #include "hip/hip_fp16.h" 
   33 #define CK_TILE_HOST inline __host__ 
   34 #define CK_TILE_DEVICE inline __device__ 
   35 #define CK_TILE_HOST_DEVICE inline __host__ __device__ 
   36 #define CK_TILE_DEVICE_EXTERN __device__ 
   37 #define CK_TILE_HOST_DEVICE_EXTERN __host__ __device__ 
   39 #define CK_TILE_HOST inline 
   40 #define CK_TILE_DEVICE inline 
   41 #define CK_TILE_HOST_DEVICE inline 
   42 #define CK_TILE_DEVICE_EXTERN 
   43 #define CK_TILE_HOST_DEVICE_EXTERN 
   50 #define CK_TILE_GENERIC_ADDR __attribute__((address_space(0))) 
   51 #define CK_TILE_GLOBAL_ADDR __attribute__((address_space(1))) 
   52 #define CK_TILE_LDS_ADDR __attribute__((address_space(3))) 
   53 #define CK_TILE_BUF_RES_ADDR __attribute__((address_space(8))) 
   55 #define CK_TILE_GENERIC_ADDR 
   56 #define CK_TILE_GLOBAL_ADDR 
   57 #define CK_TILE_LDS_ADDR 
   58 #define CK_TILE_BUF_RES_ADDR 
   60 #ifndef CK_TILE_USE_CUSTOM_DATA_TYPE 
   61 #define CK_TILE_USE_CUSTOM_DATA_TYPE 0  
   64 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD 0 
   65 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN 1 
   66 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE 2 
   67 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM 3 
   68 #define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM 4 
   70 #ifndef CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT 
   71 #define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE 
   74 #define CK_TILE_FLOAT_TO_FP8_STANDARD 0 
   75 #define CK_TILE_FLOAT_TO_FP8_STOCHASTIC 1 
   77 #ifndef CK_TILE_FLOAT_TO_FP8_DEFAULT 
   78 #define CK_TILE_FLOAT_TO_FP8_DEFAULT CK_TILE_FLOAT_TO_FP8_STANDARD 
   83 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY 0 
   84 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE 1 
   85 #ifndef CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT 
   86 #define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE 
   89 #define CK_TILE_THREAD_BUFFER_USE_ARRAY 0 
   90 #define CK_TILE_THREAD_BUFFER_USE_TUPLE 1 
   91 #ifndef CK_TILE_THREAD_BUFFER_DEFAULT 
   92 #define CK_TILE_THREAD_BUFFER_DEFAULT CK_TILE_THREAD_BUFFER_USE_ARRAY 
   95 #ifndef CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 
   96 #if CK_TILE_THREAD_BUFFER_DEFAULT == CK_TILE_THREAD_BUFFER_USE_TUPLE 
   99 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 1 
  101 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 0 
  105 #ifndef CK_TILE_USE_LAUNCH_BOUNDS 
  106 #define CK_TILE_USE_LAUNCH_BOUNDS 1 
  109 #ifndef CK_TILE_TIME_KERNEL 
  110 #define CK_TILE_TIME_KERNEL 1 
  113 #define CK_TILE_MAX_THREAD_PER_BLOCK 256 
  114 #define CK_TILE_MIN_BLOCK_PER_CU 2 
  116 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 
  117 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 0 
  120 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK 
  121 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK 1 
  124 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK 
  125 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK 1 
  128 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK 
  129 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK 1 
  132 #ifndef CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 
  133 #define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 1 
  136 #ifndef CK_TILE_USE_AMD_BUFFER_LOAD 
  137 #define CK_TILE_USE_AMD_BUFFER_LOAD 1 
  140 #ifndef CK_TILE_USE_AMD_BUFFER_STORE 
  141 #define CK_TILE_USE_AMD_BUFFER_STORE 1 
  144 #ifndef CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER 
  145 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER 1 
  148 #ifndef CK_TILE_USE_PK4_LAYOUT_SHUFFLE 
  149 #define CK_TILE_USE_PK4_LAYOUT_SHUFFLE 1 
  153 #ifndef __HIP_DEVICE_COMPILE__  
  154 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1 
  155 #elif defined(__gfx9__)  
  156 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1 
  158 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 0 
  161 #if(defined(__gfx90a__) || defined(__gfx94__))  
  162 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 1 
  164 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 0 
  167 #ifndef CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 
  168 #define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 0 
  171 #ifndef CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE 
  172 #define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE 1 
  175 #ifndef CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 
  176 #if HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 1 && HIP_VERSION_PATCH >= 40091 
  177 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 1 
  179 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 0 
  184 #ifndef CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 
  185 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133) ||  \ 
  186     (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 3 && HIP_VERSION_PATCH >= 42131) || \ 
  187     (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR > 3) 
  188 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 1 
  190 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 0 
  194 #ifndef CK_TILE_DEBUG_LOG 
  195 #define CK_TILE_DEBUG_LOG 0 
  198 #ifndef __HIP_DEVICE_COMPILE__  
  199 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0xffffffff 
  200 #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || \ 
  202 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x00020000 
  203 #elif defined(__gfx103__)  
  204 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31014000 
  205 #elif defined(__gfx11__) || defined(__gfx12__)  
  206 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31004000 
  209 #ifndef CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 
  210 #define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1 
  213 #ifndef CK_TILE_USE_SUBDWORD_TILE_CAST 
  214 #define CK_TILE_USE_SUBDWORD_TILE_CAST 0 
  217 #ifndef CK_TILE_USE_PK_FP16_TILE_CAST 
  218 #define CK_TILE_USE_PK_FP16_TILE_CAST 0 
  222 #ifndef CK_TILE_FMHA_FWD_FAST_EXP2 
  223 #define CK_TILE_FMHA_FWD_FAST_EXP2 0 
  226 #ifndef CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 
  227 #define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 0 
  230 #ifndef CK_TILE_BUFFER_LOAD_RAW_BF16_WA 
  231 #define CK_TILE_BUFFER_LOAD_RAW_BF16_WA 1 
  235 #ifndef CK_TILE_WORKAROUND_SWDEV_383542 
  236 #define CK_TILE_WORKAROUND_SWDEV_383542 1 
  239 #ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 
  240 #define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1 
  243 #ifndef CK_TILE_USE_OCP_FP8 
  244 #if defined(__HIP_DEVICE_COMPILE__) 
  245 #if defined(__gfx950__) || defined(__gfx12__) 
  246 #define CK_TILE_USE_OCP_FP8 1 
  248 #define CK_TILE_USE_OCP_FP8 0 
  251 #define CK_TILE_USE_OCP_FP8 0 
  255 #ifndef CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 
  256 #if __clang_major__ >= 20 
  257 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 1 
  259 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 0 
  263 #ifndef CK_TILE_WA_ISSUE_2028 
  264 #define CK_TILE_WA_ISSUE_2028 0 
  269 #ifndef CK_TILE_ENC_SUPPORT_Y_TO_R 
  270 #define CK_TILE_ENC_SUPPORT_Y_TO_R 0