/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/synchronization.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/synchronization.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/synchronization.hpp Source File
synchronization.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck/ck.hpp"
7 
8 namespace ck {
9 
10 #if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
11 #ifdef __gfx12__
12 __device__ void llvm_amdgcn_s_wait_dscnt(short cnt) __asm("llvm.amdgcn.s.wait.dscnt");
13 #endif
14 #endif
15 
16 __device__ void block_sync_lds()
17 {
18 #if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
19 #if defined(__gfx12__)
20  llvm_amdgcn_s_wait_dscnt(0);
21  asm volatile("s_barrier_signal -1\n\t"
22  "s_barrier_wait -1");
23 #elif defined(__gfx11__)
24  // asm volatile("\
25  // s_waitcnt lgkmcnt(0) \n \
26  // s_barrier \
27  // " ::);
28  __builtin_amdgcn_s_waitcnt(0xfc07);
29  __builtin_amdgcn_s_barrier();
30 #else
31  // asm volatile("\
32  // s_waitcnt lgkmcnt(0) \n \
33  // s_barrier \
34  // " ::);
35  __builtin_amdgcn_s_waitcnt(0xc07f);
36  __builtin_amdgcn_s_barrier();
37 #endif
38 #else
39  __syncthreads();
40 #endif
41 }
42 
43 __device__ void block_sync_lds_direct_load()
44 {
45 #ifdef __gfx12__
46  asm volatile("\
47  s_wait_loadcnt 0x0 \n \
48  s_wait_dscnt 0x0 \n \
49  s_barrier_signal -1 \n \
50  s_barrier_wait -1 \
51  " ::);
52 #else
53  asm volatile("\
54  s_waitcnt vmcnt(0) \n \
55  s_waitcnt lgkmcnt(0) \n \
56  s_barrier \
57  " ::);
58 #endif
59 }
60 
61 __device__ void s_nop()
62 {
63 #if 1
64  asm volatile("\
65  s_nop 0 \n \
66  " ::);
67 #else
68  __builtin_amdgcn_sched_barrier(0);
69 #endif
70 }
71 
72 } // namespace ck
Definition: ck.hpp:268
__device__ void s_nop()
Definition: synchronization.hpp:61
__device__ void block_sync_lds_direct_load()
Definition: synchronization.hpp:43
__device__ void block_sync_lds()
Definition: synchronization.hpp:16