rocprofiler-sdk/rccl/details/rccl.h File Reference

rocprofiler-sdk/rccl/details/rccl.h File Reference#

Rocprofiler SDK Developer API: rocprofiler-sdk/rccl/details/rccl.h File Reference
Rocprofiler SDK Developer API 0.5.0
ROCm Profiling API and tools
rccl.h File Reference
#include <hip/hip_fp16.h>
#include <hip/hip_runtime.h>
#include <limits.h>
+ Include dependency graph for rccl.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  ncclUniqueId
 Opaque unique id used to initialize communicators. More...
 
struct  ncclConfig_t
 Communicator configuration. More...
 

Macros

#define NCCL_MAJOR   2
 
#define NCCL_MINOR   20
 
#define NCCL_PATCH   5
 
#define NCCL_SUFFIX   ""
 
#define NCCL_VERSION_CODE   22005
 
#define NCCL_VERSION(X, Y, Z)    (((X) <= 2 && (Y) <= 8) ? (X) *1000 + (Y) *100 + (Z) : (X) *10000 + (Y) *100 + (Z))
 
#define RCCL_BFLOAT16   1
 
#define RCCL_FLOAT8   1
 
#define RCCL_GATHER_SCATTER   1
 
#define RCCL_ALLTOALLV   1
 
#define NCCL_COMM_NULL   NULL
 
#define NCCL_UNIQUE_ID_BYTES   128
 
#define NCCL_CONFIG_UNDEF_INT   INT_MIN
 
#define NCCL_CONFIG_UNDEF_PTR   NULL
 
#define NCCL_SPLIT_NOCOLOR   -1
 
#define NCCL_CONFIG_INITIALIZER
 

Typedefs

typedef const struct ncclComm * ncclComm_t
 Opaque handle to communicator.
 
typedef int mscclAlgoHandle_t
 Opaque handle to MSCCL algorithm.
 

Enumerations

enum  ncclResult_t {
  ncclSuccess = 0 ,
  ncclUnhandledCudaError = 1 ,
  ncclSystemError = 2 ,
  ncclInternalError = 3 ,
  ncclInvalidArgument = 4 ,
  ncclInvalidUsage = 5 ,
  ncclRemoteError = 6 ,
  ncclInProgress = 7 ,
  ncclNumResults = 8
}
 Result type. More...
 
enum  ncclRedOp_dummy_t { ncclNumOps_dummy = 5 }
 Dummy reduction enumeration. More...
 
enum  ncclRedOp_t {
  ncclSum = 0 ,
  ncclProd = 1 ,
  ncclMax = 2 ,
  ncclMin = 3 ,
  ncclAvg = 4 ,
  ncclNumOps = 5 ,
  ncclMaxRedOp
}
 Reduction operation selector. More...
 
enum  ncclDataType_t {
  ncclInt8 = 0 ,
  ncclChar = 0 ,
  ncclUint8 = 1 ,
  ncclInt32 = 2 ,
  ncclInt = 2 ,
  ncclUint32 = 3 ,
  ncclInt64 = 4 ,
  ncclUint64 = 5 ,
  ncclFloat16 = 6 ,
  ncclHalf = 6 ,
  ncclFloat32 = 7 ,
  ncclFloat = 7 ,
  ncclFloat64 = 8 ,
  ncclDouble = 8 ,
  ncclBfloat16 = 9 ,
  ncclFp8E4M3 = 10 ,
  ncclFp8E5M2 = 11 ,
  ncclNumTypes = 12
}
 Data types. More...
 
enum  ncclScalarResidence_t {
  ncclScalarDevice = 0 ,
  ncclScalarHostImmediate = 1
}
 Location and dereferencing logic for scalar arguments. More...
 

Functions

ncclResult_t ncclMemAlloc (void **ptr, unsigned long size)
 
ncclResult_t pncclMemAlloc (void **ptr, unsigned long size)
 
ncclResult_t ncclMemFree (void *ptr)
 
ncclResult_t pncclMemFree (void *ptr)
 
ncclResult_t ncclGetVersion (int *version)
 Return the RCCL_VERSION_CODE of RCCL in the supplied integer.
 
ncclResult_t ncclGetUniqueId (ncclUniqueId *uniqueId)
 Generates an ID for ncclCommInitRank.
 
ncclResult_t ncclCommInitRankConfig (ncclComm_t *comm, int nranks, ncclUniqueId commId, int rank, ncclConfig_t *config)
 Create a new communicator with config.
 
ncclResult_t ncclCommInitRank (ncclComm_t *comm, int nranks, ncclUniqueId commId, int rank)
 Creates a new communicator (multi thread/process version).
 
ncclResult_t ncclCommInitAll (ncclComm_t *comm, int ndev, const int *devlist)
 Creates a clique of communicators (single process version).
 
ncclResult_t ncclCommFinalize (ncclComm_t comm)
 Finalize a communicator.
 
ncclResult_t ncclCommDestroy (ncclComm_t comm)
 Frees local resources associated with communicator object.
 
ncclResult_t ncclCommAbort (ncclComm_t comm)
 Abort any in-progress calls and destroy the communicator object.
 
ncclResult_t ncclCommSplit (ncclComm_t comm, int color, int key, ncclComm_t *newcomm, ncclConfig_t *config)
 Create one or more communicators from an existing one.
 
const char * ncclGetErrorString (ncclResult_t result)
 Returns a string for each result code.
 
const char * ncclGetLastError (ncclComm_t comm)
 
ncclResult_t ncclCommGetAsyncError (ncclComm_t comm, ncclResult_t *asyncError)
 Checks whether the comm has encountered any asynchronous errors.
 
ncclResult_t ncclCommCount (const ncclComm_t comm, int *count)
 Gets the number of ranks in the communicator clique.
 
ncclResult_t ncclCommCuDevice (const ncclComm_t comm, int *device)
 Get the ROCm device index associated with a communicator.
 
ncclResult_t ncclCommUserRank (const ncclComm_t comm, int *rank)
 Get the rank associated with a communicator.
 
ncclResult_t ncclCommRegister (const ncclComm_t comm, void *buff, unsigned long size, void **handle)
 
ncclResult_t ncclCommDeregister (const ncclComm_t comm, void *handle)
 
ncclResult_t ncclRedOpCreatePreMulSum (ncclRedOp_t *op, void *scalar, ncclDataType_t datatype, ncclScalarResidence_t residence, ncclComm_t comm)
 Create a custom pre-multiplier reduction operator.
 
ncclResult_t ncclRedOpDestroy (ncclRedOp_t op, ncclComm_t comm)
 Destroy custom reduction operator.
 
ncclResult_t ncclReduce (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, hipStream_t stream)
 Reduce.
 
ncclResult_t ncclBcast (void *buff, unsigned long count, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
 (Deprecated) Broadcast (in-place)
 
ncclResult_t ncclBroadcast (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
 Broadcast.
 
ncclResult_t ncclAllReduce (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm, hipStream_t stream)
 All-Reduce.
 
ncclResult_t ncclReduceScatter (const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm, hipStream_t stream)
 Reduce-Scatter.
 
ncclResult_t ncclAllGather (const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
 All-Gather.
 
ncclResult_t ncclSend (const void *sendbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream)
 Send.
 
ncclResult_t ncclRecv (void *recvbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream)
 Receive.
 
ncclResult_t ncclGather (const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
 Gather.
 
ncclResult_t ncclScatter (const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
 Scatter.
 
ncclResult_t ncclAllToAll (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
 All-To-All.
 
ncclResult_t ncclAllToAllv (const void *sendbuff, const unsigned long sendcounts[], const unsigned long sdispls[], void *recvbuff, const unsigned long recvcounts[], const unsigned long rdispls[], ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
 All-To-Allv.
 
ncclResult_t mscclLoadAlgo (const char *mscclAlgoFilePath, mscclAlgoHandle_t *mscclAlgoHandle, int rank)
 MSCCL Load Algorithm.
 
ncclResult_t mscclRunAlgo (const void *sendBuff, const unsigned long sendCounts[], const unsigned long sDisPls[], void *recvBuff, const unsigned long recvCounts[], const unsigned long rDisPls[], unsigned long count, ncclDataType_t dataType, int root, int peer, ncclRedOp_t op, mscclAlgoHandle_t mscclAlgoHandle, ncclComm_t comm, hipStream_t stream)
 MSCCL Run Algorithm.
 
ncclResult_t mscclUnloadAlgo (mscclAlgoHandle_t mscclAlgoHandle)
 MSCCL Unload Algorithm.
 
ncclResult_t ncclGroupStart ()
 Group Start.
 
ncclResult_t ncclGroupEnd ()
 Group End.
 

Data Structure Documentation

◆ ncclUniqueId

struct ncclUniqueId

Opaque unique id used to initialize communicators.

The ncclUniqueId must be passed to all participating ranks

Definition at line 44 of file rccl.h.

+ Collaboration diagram for ncclUniqueId:
Data Fields
char internal[128]

Opaque array>

Macro Definition Documentation

◆ NCCL_COMM_NULL

#define NCCL_COMM_NULL   NULL

Definition at line 39 of file rccl.h.

◆ NCCL_CONFIG_UNDEF_INT

#define NCCL_CONFIG_UNDEF_INT   INT_MIN

Definition at line 69 of file rccl.h.

◆ NCCL_CONFIG_UNDEF_PTR

#define NCCL_CONFIG_UNDEF_PTR   NULL

Definition at line 70 of file rccl.h.

◆ NCCL_MAJOR

#define NCCL_MAJOR   2

Definition at line 15 of file rccl.h.

◆ NCCL_MINOR

#define NCCL_MINOR   20

Definition at line 16 of file rccl.h.

◆ NCCL_PATCH

#define NCCL_PATCH   5

Definition at line 17 of file rccl.h.

◆ NCCL_SPLIT_NOCOLOR

#define NCCL_SPLIT_NOCOLOR   -1

Definition at line 71 of file rccl.h.

◆ NCCL_SUFFIX

#define NCCL_SUFFIX   ""

Definition at line 18 of file rccl.h.

◆ NCCL_UNIQUE_ID_BYTES

#define NCCL_UNIQUE_ID_BYTES   128

Definition at line 41 of file rccl.h.

◆ NCCL_VERSION

#define NCCL_VERSION (   X,
  Y,
 
)     (((X) <= 2 && (Y) <= 8) ? (X) *1000 + (Y) *100 + (Z) : (X) *10000 + (Y) *100 + (Z))

Definition at line 21 of file rccl.h.

22 : (X) *10000 + (Y) *100 + (Z))

◆ NCCL_VERSION_CODE

#define NCCL_VERSION_CODE   22005

Definition at line 20 of file rccl.h.

◆ RCCL_ALLTOALLV

#define RCCL_ALLTOALLV   1

Definition at line 27 of file rccl.h.

◆ RCCL_BFLOAT16

#define RCCL_BFLOAT16   1

Definition at line 24 of file rccl.h.

◆ RCCL_FLOAT8

#define RCCL_FLOAT8   1

Definition at line 25 of file rccl.h.

◆ RCCL_GATHER_SCATTER

#define RCCL_GATHER_SCATTER   1

Definition at line 26 of file rccl.h.

Typedef Documentation

◆ ncclComm_t

typedef const struct ncclComm* ncclComm_t

Opaque handle to communicator.

A communicator contains information required to facilitate collective communications calls

Definition at line 38 of file rccl.h.

Function Documentation

◆ ncclCommDeregister()

ncclResult_t ncclCommDeregister ( const ncclComm_t  comm,
void *  handle 
)

◆ ncclCommRegister()

ncclResult_t ncclCommRegister ( const ncclComm_t  comm,
void *  buff,
unsigned long  size,
void **  handle 
)

◆ ncclMemAlloc()

ncclResult_t ncclMemAlloc ( void **  ptr,
unsigned long  size 
)

◆ ncclMemFree()

ncclResult_t ncclMemFree ( void *  ptr)

◆ pncclMemAlloc()

ncclResult_t pncclMemAlloc ( void **  ptr,
unsigned long  size 
)

◆ pncclMemFree()

ncclResult_t pncclMemFree ( void *  ptr)