30 #ifndef HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_
31 #define HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_
33 #include <type_traits>
35 #include "../../../config.hpp"
37 #include "../../../thread/thread_load.hpp"
39 BEGIN_HIPCUB_NAMESPACE
70 __device__ __forceinline__
void Sync()
const
72 volatile SyncFlag *d_vol_sync = d_sync;
84 d_vol_sync[blockIdx.x] = 1;
90 for (uint32_t peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
92 while (ThreadLoad<LOAD_CG>(d_sync + peer_block) == 0)
94 __threadfence_block();
101 for (uint32_t peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
103 d_vol_sync[peer_block] = 0;
108 if (threadIdx.x == 0)
111 d_vol_sync[blockIdx.x] = 1;
114 while (ThreadLoad<LOAD_CG>(d_sync + blockIdx.x) == 1)
116 __threadfence_block();
132 class GridBarrierLifetime :
public GridBarrier
152 hipError_t retval = hipSuccess;
155 retval = hipFree(d_sync);
176 hipError_t
Setup(
int sweep_grid_size)
178 hipError_t retval = hipSuccess;
180 size_t new_sync_bytes = sweep_grid_size *
sizeof(
SyncFlag);
181 if (new_sync_bytes > sync_bytes)
185 if ((retval = hipFree(d_sync)))
break;
188 sync_bytes = new_sync_bytes;
191 if ((retval = hipMalloc((
void**) &d_sync, sync_bytes)))
break;
192 if ((retval = hipMemset(d_sync, 0, new_sync_bytes)))
break;
GridBarrier implements a software global barrier among thread blocks within a hip grid.
Definition: grid_barrier.hpp:51
hipError_t Setup(int sweep_grid_size)
Definition: grid_barrier.hpp:176
unsigned int SyncFlag
Synchronize.
Definition: grid_barrier.hpp:54
hipError_t HostReset()
Definition: grid_barrier.hpp:150
GridBarrierLifetime()
Definition: grid_barrier.hpp:144
virtual ~GridBarrierLifetime()
Definition: grid_barrier.hpp:166
GridBarrier()
Definition: grid_barrier.hpp:64