Collective routines#

ROCSHMEM_BARRIER_ALL#

__device__ void rocshmem_ctx_wg_barrier_all(rocshmem_ctx_t ctx)#
__device__ void rocshmem_wg_barrier_all()#
Parameters:

ctx – Context with which to perform this operation.

Returns:

None.

Description: This routine performs a collective barrier between all PEs in the system. The caller is blocked until the barrier is resolved.

ROCSHMEM_TEAM_SYNC#

__device__ void rocshmem_ctx_wg_team_sync(rocshmem_ctx_t ctx, rocshmem_team_t team)#
__device__ void rocshmem_wg_team_sync(rocshmem_team_t team)#
Parameters:
  • ctx – Context with which to perform this operation.

  • team – Team with which to perform this operation.

Returns:

None.

Description: This routine registers the arrival of a PE at a barrier. The caller is blocked until the synchronization is resolved.

Unlike the shmem_barrier_all routine, shmem_team_sync only ensures the completion and visibility of previously issued memory stores, but does not ensure the completion of remote memory updates issued via OpenSHMEM routines.

ROCSHMEM_SYNC_ALL#

__device__ void rocshmem_ctx_wg_sync_all(rocshmem_ctx_t ctx)#
__device__ void rocshmem_wg_sync_all()#
Parameters:

ctx – Context with which to perform this operation.

Returns:

None.

Description: This routine behaves the same as rocshmem_wg_team_sync when called on the world team.

ROSHMEM_ALLTOALL#

__device__ void rocshmem_ctx_TYPENAME_wg_alltoall(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems)#
Parameters:
  • team – The team participating in the collective.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – Number of data blocks transferred per pair of PEs.

Returns:

None.

Description: This routine exchanges a fixed amount of contiguous data blocks between all pairs of PEs participating in the collective routine. This function must be called as a work-group collective.

Valid TYPENAME and TYPE values are listed in RMA Data Types.

ROCSHMEM_BROADCAST#

__device__ void rocshmem_ctx_TYPENAME_wg_broadcast(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems, int pe_root)#
Parameters:
  • ctx – Context with which to perform this collective.

  • team – The team participating in the collective.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – Number of data blocks transferred per pair of PEs.

Returns:

None.

Description: This routine performs a broadcast across PEs in the team. The caller is blocked until the broadcast completes.

Valid TYPENAME and TYPE values are listed in RMA Data Types.

ROCSHMEM_FCOLLECT#

__device__ void rocshmem_ctx_TYPENAME_wg_fcollect(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems)#
Parameters:
  • ctx – Context with which to perform this collective.

  • team – The team participating in the collective.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – Number of data blocks transferred per pair of PEs.

Returns:

None.

Description: This routine concatenates blocks of data from multiple PEs to an array in every PE participating in the collective routine.

ROCSHMEM_REDUCTION#

__device__ int rocshmem_ctx_TYPENAME_OPNAME_wg_reduce(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nreduce)#
Parameters:
  • ctx – Context with which to perform this collective.

  • team – The team participating in the collective.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nreduce – Number of data blocks transferred per pair of PEs.

Returns:

Zero on successful local completion. Nonzero otherwise.

Description: This routine performs an allreduce operation across PEs in the team.

Valid TYPENAME, TYPE, and OPNAME values are listed in Reduction Types, Names and Operations.

Supported reduction types and operations#

Table 7 Reduction Types, Names and Operations#

TYPE

TYPENAME

OPNAME

Supported

char

char

max, min, sum, prod

No

signed char

schar

max, min, sum, prod

No

short

short

max, min, sum, prod

Yes

int

int

max, min, sum, prod

Yes

long

long

max, min, sum, prod

Yes

long long

longlong

max, min, sum, prod

Yes

ptrdiff_t

ptrdiff

max, min, sum, prod

No

unsigned char

uchar

and, or, xor, max, min, sum, prod

No

unsigned short

ushort

and, or, xor, max, min, sum, prod

No

unsigned int

uint

and, or, xor, max, min, sum, prod

No

unsigned long

ulong

and, or, xor, max, min, sum, prod

No

unsigned long long

ulonglong

and, or, xor, max, min, sum, prod

No

int8_t

int8

and, or, xor, max, min, sum, prod

No

int16_t

int16

and, or, xor, max, min, sum, prod

No

int32_t

int32

and, or, xor, max, min, sum, prod

No

int64_t

int64

and, or, xor, max, min, sum, prod

No

uint8_t

uint8

and, or, xor, max, min, sum, prod

No

uint16_t

uint16

and, or, xor, max, min, sum, prod

No

uint32_t

uint32

and, or, xor, max, min, sum, prod

No

uint64_t

uint64

and, or, xor, max, min, sum, prod

No

size_t

size

and, or, xor, max, min, sum, prod

No

float

float

max, min, sum, prod

Yes

double

double

max, min, sum, prod

Yes

long double

longdouble

max, min, sum, prod

No

double _Complex

complexd

sum, prod

No

float _Complex

complexf

sum, prod

No