10 #if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
12 __device__
void llvm_amdgcn_s_wait_dscnt(
short cnt) __asm(
"llvm.amdgcn.s.wait.dscnt");
18 #if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
19 #if defined(__gfx12__)
20 llvm_amdgcn_s_wait_dscnt(0);
21 asm volatile(
"s_barrier_signal -1\n\t"
23 #elif defined(__gfx11__)
28 __builtin_amdgcn_s_waitcnt(0xfc07);
29 __builtin_amdgcn_s_barrier();
35 __builtin_amdgcn_s_waitcnt(0xc07f);
36 __builtin_amdgcn_s_barrier();
47 s_wait_loadcnt 0x0 \n \
49 s_barrier_signal -1 \n \
54 s_waitcnt vmcnt(0) \n \
55 s_waitcnt lgkmcnt(0) \n \
68 __builtin_amdgcn_sched_barrier(0);
__device__ void s_nop()
Definition: synchronization.hpp:61
__device__ void block_sync_lds_direct_load()
Definition: synchronization.hpp:43
__device__ void block_sync_lds()
Definition: synchronization.hpp:16