/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.0/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.0/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.0/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp Source File
grid_barrier.hpp
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_
31 #define HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_
32 
33 #include <type_traits>
34 
35 #include "../../../config.hpp"
36 
37 #include "../../../thread/thread_load.hpp"
38 
39 BEGIN_HIPCUB_NAMESPACE
40 
51 {
52 protected :
53 
54  typedef unsigned int SyncFlag;
55 
56  // Counters in global device memory
57  SyncFlag* d_sync;
58 
59 public:
60 
64  GridBarrier() : d_sync(NULL) {}
65 
70  __device__ __forceinline__ void Sync() const
71  {
72  volatile SyncFlag *d_vol_sync = d_sync;
73 
74  // Threadfence and syncthreads to make sure global writes are visible before
75  // thread-0 reports in with its sync counter
76  __threadfence();
77  __syncthreads();
78 
79  if (blockIdx.x == 0)
80  {
81  // Report in ourselves
82  if (threadIdx.x == 0)
83  {
84  d_vol_sync[blockIdx.x] = 1;
85  }
86 
87  __syncthreads();
88 
89  // Wait for everyone else to report in
90  for (uint32_t peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
91  {
92  while (ThreadLoad<LOAD_CG>(d_sync + peer_block) == 0)
93  {
94  __threadfence_block();
95  }
96  }
97 
98  __syncthreads();
99 
100  // Let everyone know it's safe to proceed
101  for (uint32_t peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
102  {
103  d_vol_sync[peer_block] = 0;
104  }
105  }
106  else
107  {
108  if (threadIdx.x == 0)
109  {
110  // Report in
111  d_vol_sync[blockIdx.x] = 1;
112 
113  // Wait for acknowledgment
114  while (ThreadLoad<LOAD_CG>(d_sync + blockIdx.x) == 1)
115  {
116  __threadfence_block();
117  }
118  }
119 
120  __syncthreads();
121  }
122  }
123 };
124 
125 
132 class GridBarrierLifetime : public GridBarrier
133 {
134 protected:
135 
136  // Number of bytes backed by d_sync
137  size_t sync_bytes;
138 
139 public:
140 
144  GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {}
145 
146 
150  hipError_t HostReset()
151  {
152  hipError_t retval = hipSuccess;
153  if (d_sync)
154  {
155  retval = hipFree(d_sync);
156  d_sync = NULL;
157  }
158  sync_bytes = 0;
159  return retval;
160  }
161 
162 
167  {
168  HostReset();
169  }
170 
171 
176  hipError_t Setup(int sweep_grid_size)
177  {
178  hipError_t retval = hipSuccess;
179  do {
180  size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag);
181  if (new_sync_bytes > sync_bytes)
182  {
183  if (d_sync)
184  {
185  if ((retval = hipFree(d_sync))) break;
186  }
187 
188  sync_bytes = new_sync_bytes;
189 
190  // Allocate and initialize to zero
191  if ((retval = hipMalloc((void**) &d_sync, sync_bytes))) break;
192  if ((retval = hipMemset(d_sync, 0, new_sync_bytes))) break;
193  }
194  } while (0);
195 
196  return retval;
197  }
198 };
199 
200 END_HIPCUB_NAMESPACE
201 
202 #endif // HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_
GridBarrier implements a software global barrier among thread blocks within a hip grid.
Definition: grid_barrier.hpp:51
hipError_t Setup(int sweep_grid_size)
Definition: grid_barrier.hpp:176
unsigned int SyncFlag
Synchronize.
Definition: grid_barrier.hpp:54
hipError_t HostReset()
Definition: grid_barrier.hpp:150
GridBarrierLifetime()
Definition: grid_barrier.hpp:144
virtual ~GridBarrierLifetime()
Definition: grid_barrier.hpp:166
GridBarrier()
Definition: grid_barrier.hpp:64