/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File
workgroup_barrier.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 #include <hip/hip_runtime.h>
6 #include <stdint.h>
7 
8 namespace ck {
10 {
11  __device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
12 
13  __device__ uint32_t ld(uint32_t offset)
14  {
15 #if 0
17  amdgcn_make_buffer_resource(base_ptr),
18  0,
19  offset,
20  AMDGCN_BUFFER_GLC);
21  union cvt {
22  float f32;
23  uint32_t u32;
24  };
25  cvt x;
26  x.f32 = d;
27  return x.u32;
28 #endif
29  return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
30  }
31 
32  __device__ void wait_eq(uint32_t offset, uint32_t value)
33  {
34  if(threadIdx.x == 0)
35  {
36  while(ld(offset) != value) {}
37  }
38  __syncthreads();
39  }
40 
41  __device__ void wait_lt(uint32_t offset, uint32_t value)
42  {
43  if(threadIdx.x == 0)
44  {
45  while(ld(offset) < value) {}
46  }
47  __syncthreads();
48  }
49 
50  __device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
51  {
52  if(threadIdx.x == 0)
53  {
54  while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
55  }
56  __syncthreads();
57  }
58 
59  // enter critical zoon, assume buffer is zero when launch kernel
60  __device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
61 
62  // exit critical zoon, assume buffer is zero when launch kernel
63  __device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
64 
65  __device__ void inc(uint32_t offset)
66  {
67  __syncthreads();
68  if(threadIdx.x == 0)
69  {
70  atomicAdd(base_ptr + offset, 1);
71  }
72  }
73 
75 };
76 } // namespace ck
Definition: ck.hpp:270
__device__ float llvm_amdgcn_raw_buffer_load_fp32(int32x4_t srsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.f32")
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
unsigned int uint32_t
Definition: stdint.h:126
Definition: workgroup_barrier.hpp:10
__device__ void wait_lt(uint32_t offset, uint32_t value)
Definition: workgroup_barrier.hpp:41
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
Definition: workgroup_barrier.hpp:50
uint32_t * base_ptr
Definition: workgroup_barrier.hpp:74
__device__ void release(uint32_t offset)
Definition: workgroup_barrier.hpp:63
__device__ workgroup_barrier(uint32_t *ptr)
Definition: workgroup_barrier.hpp:11
__device__ void inc(uint32_t offset)
Definition: workgroup_barrier.hpp:65
__device__ void aquire(uint32_t offset)
Definition: workgroup_barrier.hpp:60
__device__ void wait_eq(uint32_t offset, uint32_t value)
Definition: workgroup_barrier.hpp:32
__device__ uint32_t ld(uint32_t offset)
Definition: workgroup_barrier.hpp:13