Remote memory access routines

Contents

Remote memory access routines#

  • Routines with the _wave and _wg suffixes require all threads in a wavefront and workgroup, respectively, to call the routine with the same parameters.

  • Routines with the _nbi substring will return as soon as the request is posted.

  • Routines without the _nbi substring will block until the operation completes locally.

  • Valid TYPENAME and TYPE values can be found in RMA_TYPES.

ROCSHMEM_PUT#

__device__ void rocshmem_TYPENAME_put(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_put_wave(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_put_wg(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_put_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_put_nbi_wave(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_put_nbi_wg(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put_wave(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put_wg(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put_nbi(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put_nbi_wave(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_put_nbi_wg(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – The number of elements to transfer.

  • pe – PE of the remote process.

Returns:

None.

Description: This routine writes contiguous data of nelems elements from source on the calling PE to dest at pe.

ROCSHMEM_PUTMEM#

__device__ void rocshmem_putmem(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_putmem_wave(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_putmem_wg(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_putmem_nbi(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_putmem_nbi_wave(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_putmem_nbi_wg(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem_wave(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem_wg(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem_nbi(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem_nbi_wave(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_putmem_nbi_wg(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – Size of the transfer in bytes.

  • pe – PE of the remote process.

Returns:

None.

Description: This routine writes contiguous data of nelems bytes from source on the calling PE to dest at pe.

ROCSHMEM_P#

__device__ void rocshmem_TYPENAME_p(TYPE *dest, TYPE value, int pe)#
__device__ void rocshmem_ctx_TYPENAME_p(rocshmem_ctx_t ctx, TYPE *dest, TYPE value, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • dest – Destination address. Must be an address on the symmetric heap.

  • value – Value to write to dest at pe.

  • pe – PE of the remote process.

Returns:

None.

Description: This routine writes a single value to to dest at pe.

ROCSHMEM_GET#

__device__ void rocshmem_TYPENAME_get(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_get_wave(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_get_wg(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_get_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_get_nbi_wave(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_TYPENAME_get_nbi_wg(TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get_wave(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get_wg(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get_nbi(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get_nbi_wave(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_TYPENAME_get_nbi_wg(rocshmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • dest – Destination address; Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – The number of elements to transfer.

  • pe – PE of the remote process.

Returns:

None.

Description: This routine reads contiguous data of nelems elements from source on pe to dest on the calling PE.

ROCSHMEM_GETMEM#

__device__ void rocshmem_getmem(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_getmem_wave(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_getmem_wg(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_getmem_nbi(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_getmem_nbi_wave(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_getmem_nbi_wg(void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem_wave(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem_wg(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem_nbi(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem_nbi_wave(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
__device__ void rocshmem_ctx_getmem_nbi_wg(rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • dest – Destination address. Must be an address on the symmetric heap.

  • source – Source address. Must be an address on the symmetric heap.

  • nelems – Size of the transfer in bytes.

  • pe – PE of the remote process.

Returns:

None.

Description: This routine reads contiguous data of nelems bytes from source on pe to dest on the calling PE.

ROCSHMEM_G#

__device__ float rocshmem_ctx_float_g(rocshmem_ctx_t ctx, const float *source, int pe)#
__device__ float rocshmem_float_g(const float *source, int pe)#
Parameters:
  • ctx – Context with which to perform this operation.

  • source – Source address. Must be an address on the symmetric heap.

  • pe – PE of the remote process.

Returns:

The value read from source at pe.

Description: This routine reads and returns single value from source at pe.

Supported RMA data types#

The following table lists the supported RMA data types:

Table 2 RMA Data Types#

TYPE

TYPENAME

Supported

float

float

Yes

double

double

Yes

long double

longdouble

No

char

char

Yes

signed char

schar

Yes

short

short

Yes

int

int

Yes

long

long

Yes

long long

longlong

Yes

unsigned char

uchar

Yes

unsigned short

ushort

Yes

unsigned int

uint

Yes

unsigned long

ulong

Yes

unsigned long long

ulonglong

Yes

int8_t

int8

No

int16_t

int16

No

int32_t

int32

No

int64_t

int64

No

uint8_t

uint8

No

uint16_t

uint16

No

uint32_t

uint32

No

uint64_t

uint64

No

size_t

size

No

ptrdiff_t

ptrdiff

No