15 template <
typename,
bool>
21 using type = std::underlying_type_t<T>;
53 #if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__)
76 return __builtin_amdgcn_readfirstlane(threadIdx.x /
get_warp_size());
85 #if CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
91 __builtin_amdgcn_s_waitcnt(0xc07f);
92 __builtin_amdgcn_s_barrier();
101 asm volatile(
"s_wait_loadcnt %0 \n"
102 "s_barrier_signal -1 \n"
108 asm volatile(
"s_waitcnt vmcnt(%0) \n"
119 s_waitcnt vmcnt(0) \n \
120 s_waitcnt lgkmcnt(0) \n \
128 asm volatile(
"s_nop %0" : :
"n"(cnt) :);
130 __builtin_amdgcn_sched_barrier(cnt);
134 #define CK_CONSTANT_ADDRESS_SPACE \
135 __attribute__((address_space( \
136 static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))
138 template <
typename T>
143 #pragma clang diagnostic push
144 #pragma clang diagnostic ignored "-Wold-style-cast"
146 #pragma clang diagnostic pop
149 template <
typename T>
154 #pragma clang diagnostic push
155 #pragma clang diagnostic ignored "-Wold-style-cast"
157 #pragma clang diagnostic pop
162 #if defined(__gfx950__)
#define CK_CONSTANT_ADDRESS_SPACE
Definition: arch.hpp:134
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE index_t get_warp_size()
Definition: arch.hpp:51
CK_TILE_DEVICE index_t get_lane_id()
Definition: arch.hpp:72
CK_TILE_DEVICE void atomic_add(X *p_dst, const X &x)
memory_operation_enum
Definition: arch.hpp:44
CK_TILE_HOST_DEVICE T add(const T &a, const T &b)
Definition: generic_memory_space_atomic.hpp:12
CK_TILE_DEVICE index_t get_block_1d_id()
Definition: arch.hpp:69
CK_TILE_DEVICE void block_sync_lds()
Definition: arch.hpp:83
int32_t index_t
Definition: integer.hpp:9
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE * cast_pointer_to_constant_address_space(T *p)
Definition: arch.hpp:150
CK_TILE_DEVICE void block_sync_lds_direct_load()
Definition: arch.hpp:116
constexpr CK_TILE_HOST_DEVICE index_t get_smem_capacity()
Definition: arch.hpp:160
CK_TILE_DEVICE void s_nop(index_t cnt=0)
Definition: arch.hpp:125
CK_TILE_DEVICE index_t get_thread_local_1d_id()
Definition: arch.hpp:65
CK_TILE_DEVICE index_t get_block_size()
Definition: arch.hpp:62
CK_TILE_DEVICE index_t get_warp_id()
Definition: arch.hpp:74
__device__ T * cast_pointer_to_generic_address_space(T CK_CONSTANT_ADDRESS_SPACE *p)
Definition: arch.hpp:139
CK_TILE_DEVICE index_t get_thread_id()
Definition: arch.hpp:79
CK_TILE_DEVICE index_t get_thread_global_1d_id()
Definition: arch.hpp:67
address_space_enum
Definition: arch.hpp:34
CK_TILE_DEVICE index_t get_block_id()
Definition: arch.hpp:81
CK_TILE_DEVICE index_t get_grid_size()
Definition: arch.hpp:60
CK_TILE_DEVICE void block_sync_load_raw(index_t cnt=0)
Definition: arch.hpp:98
typename safe_underlying_type< T, std::is_enum< T >::value >::type safe_underlying_type_t
Definition: arch.hpp:31
void type
Definition: arch.hpp:27
std::underlying_type_t< T > type
Definition: arch.hpp:21