rocprofiler-sdk/rccl/details/rccl.h File Reference#
Rocprofiler SDK Developer API 0.5.0
ROCm Profiling API and tools
|
rccl.h File Reference
#include <hip/hip_fp16.h>
#include <hip/hip_runtime.h>
#include <limits.h>
Include dependency graph for rccl.h:
This graph shows which files directly or indirectly include this file:
Go to the source code of this file.
Data Structures | |
struct | ncclUniqueId |
Opaque unique id used to initialize communicators. More... | |
struct | ncclConfig_t |
Macros | |
#define | NCCL_MAJOR 2 |
#define | NCCL_MINOR 20 |
#define | NCCL_PATCH 5 |
#define | NCCL_SUFFIX "" |
#define | NCCL_VERSION_CODE 22005 |
#define | NCCL_VERSION(X, Y, Z) (((X) <= 2 && (Y) <= 8) ? (X) *1000 + (Y) *100 + (Z) : (X) *10000 + (Y) *100 + (Z)) |
#define | RCCL_BFLOAT16 1 |
#define | RCCL_FLOAT8 1 |
#define | RCCL_GATHER_SCATTER 1 |
#define | RCCL_ALLTOALLV 1 |
#define | NCCL_COMM_NULL NULL |
#define | NCCL_UNIQUE_ID_BYTES 128 |
#define | NCCL_CONFIG_UNDEF_INT INT_MIN |
#define | NCCL_CONFIG_UNDEF_PTR NULL |
#define | NCCL_SPLIT_NOCOLOR -1 |
#define | NCCL_CONFIG_INITIALIZER |
Typedefs | |
typedef const struct ncclComm * | ncclComm_t |
Opaque handle to communicator. | |
typedef int | mscclAlgoHandle_t |
Opaque handle to MSCCL algorithm. | |
Enumerations | |
enum | ncclResult_t { ncclSuccess = 0 , ncclUnhandledCudaError = 1 , ncclSystemError = 2 , ncclInternalError = 3 , ncclInvalidArgument = 4 , ncclInvalidUsage = 5 , ncclRemoteError = 6 , ncclInProgress = 7 , ncclNumResults = 8 } |
Result type. More... | |
enum | ncclRedOp_dummy_t { ncclNumOps_dummy = 5 } |
Dummy reduction enumeration. More... | |
enum | ncclRedOp_t { ncclSum = 0 , ncclProd = 1 , ncclMax = 2 , ncclMin = 3 , ncclAvg = 4 , ncclNumOps = 5 , ncclMaxRedOp } |
Reduction operation selector. More... | |
enum | ncclDataType_t { ncclInt8 = 0 , ncclChar = 0 , ncclUint8 = 1 , ncclInt32 = 2 , ncclInt = 2 , ncclUint32 = 3 , ncclInt64 = 4 , ncclUint64 = 5 , ncclFloat16 = 6 , ncclHalf = 6 , ncclFloat32 = 7 , ncclFloat = 7 , ncclFloat64 = 8 , ncclDouble = 8 , ncclBfloat16 = 9 , ncclFp8E4M3 = 10 , ncclFp8E5M2 = 11 , ncclNumTypes = 12 } |
Data types. More... | |
enum | ncclScalarResidence_t { ncclScalarDevice = 0 , ncclScalarHostImmediate = 1 } |
Location and dereferencing logic for scalar arguments. More... | |
Functions | |
ncclResult_t | ncclMemAlloc (void **ptr, unsigned long size) |
ncclResult_t | pncclMemAlloc (void **ptr, unsigned long size) |
ncclResult_t | ncclMemFree (void *ptr) |
ncclResult_t | pncclMemFree (void *ptr) |
ncclResult_t | ncclGetVersion (int *version) |
Return the RCCL_VERSION_CODE of RCCL in the supplied integer. | |
ncclResult_t | ncclGetUniqueId (ncclUniqueId *uniqueId) |
Generates an ID for ncclCommInitRank. | |
ncclResult_t | ncclCommInitRankConfig (ncclComm_t *comm, int nranks, ncclUniqueId commId, int rank, ncclConfig_t *config) |
Create a new communicator with config. | |
ncclResult_t | ncclCommInitRank (ncclComm_t *comm, int nranks, ncclUniqueId commId, int rank) |
Creates a new communicator (multi thread/process version). | |
ncclResult_t | ncclCommInitAll (ncclComm_t *comm, int ndev, const int *devlist) |
Creates a clique of communicators (single process version). | |
ncclResult_t | ncclCommFinalize (ncclComm_t comm) |
Finalize a communicator. | |
ncclResult_t | ncclCommDestroy (ncclComm_t comm) |
Frees local resources associated with communicator object. | |
ncclResult_t | ncclCommAbort (ncclComm_t comm) |
Abort any in-progress calls and destroy the communicator object. | |
ncclResult_t | ncclCommSplit (ncclComm_t comm, int color, int key, ncclComm_t *newcomm, ncclConfig_t *config) |
Create one or more communicators from an existing one. | |
const char * | ncclGetErrorString (ncclResult_t result) |
Returns a string for each result code. | |
const char * | ncclGetLastError (ncclComm_t comm) |
ncclResult_t | ncclCommGetAsyncError (ncclComm_t comm, ncclResult_t *asyncError) |
Checks whether the comm has encountered any asynchronous errors. | |
ncclResult_t | ncclCommCount (const ncclComm_t comm, int *count) |
Gets the number of ranks in the communicator clique. | |
ncclResult_t | ncclCommCuDevice (const ncclComm_t comm, int *device) |
Get the ROCm device index associated with a communicator. | |
ncclResult_t | ncclCommUserRank (const ncclComm_t comm, int *rank) |
Get the rank associated with a communicator. | |
ncclResult_t | ncclCommRegister (const ncclComm_t comm, void *buff, unsigned long size, void **handle) |
ncclResult_t | ncclCommDeregister (const ncclComm_t comm, void *handle) |
ncclResult_t | ncclRedOpCreatePreMulSum (ncclRedOp_t *op, void *scalar, ncclDataType_t datatype, ncclScalarResidence_t residence, ncclComm_t comm) |
Create a custom pre-multiplier reduction operator. | |
ncclResult_t | ncclRedOpDestroy (ncclRedOp_t op, ncclComm_t comm) |
Destroy custom reduction operator. | |
ncclResult_t | ncclReduce (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, hipStream_t stream) |
Reduce. | |
ncclResult_t | ncclBcast (void *buff, unsigned long count, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream) |
(Deprecated) Broadcast (in-place) | |
ncclResult_t | ncclBroadcast (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream) |
Broadcast. | |
ncclResult_t | ncclAllReduce (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm, hipStream_t stream) |
All-Reduce. | |
ncclResult_t | ncclReduceScatter (const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm, hipStream_t stream) |
Reduce-Scatter. | |
ncclResult_t | ncclAllGather (const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream) |
All-Gather. | |
ncclResult_t | ncclSend (const void *sendbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream) |
Send. | |
ncclResult_t | ncclRecv (void *recvbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream) |
Receive. | |
ncclResult_t | ncclGather (const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream) |
Gather. | |
ncclResult_t | ncclScatter (const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream) |
Scatter. | |
ncclResult_t | ncclAllToAll (const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream) |
All-To-All. | |
ncclResult_t | ncclAllToAllv (const void *sendbuff, const unsigned long sendcounts[], const unsigned long sdispls[], void *recvbuff, const unsigned long recvcounts[], const unsigned long rdispls[], ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream) |
All-To-Allv. | |
ncclResult_t | mscclLoadAlgo (const char *mscclAlgoFilePath, mscclAlgoHandle_t *mscclAlgoHandle, int rank) |
MSCCL Load Algorithm. | |
ncclResult_t | mscclRunAlgo (const void *sendBuff, const unsigned long sendCounts[], const unsigned long sDisPls[], void *recvBuff, const unsigned long recvCounts[], const unsigned long rDisPls[], unsigned long count, ncclDataType_t dataType, int root, int peer, ncclRedOp_t op, mscclAlgoHandle_t mscclAlgoHandle, ncclComm_t comm, hipStream_t stream) |
MSCCL Run Algorithm. | |
ncclResult_t | mscclUnloadAlgo (mscclAlgoHandle_t mscclAlgoHandle) |
MSCCL Unload Algorithm. | |
ncclResult_t | ncclGroupStart () |
Group Start. | |
ncclResult_t | ncclGroupEnd () |
Group End. | |
Data Structure Documentation
◆ ncclUniqueId
struct ncclUniqueId |
Opaque unique id used to initialize communicators.
The ncclUniqueId must be passed to all participating ranks
Collaboration diagram for ncclUniqueId:
Data Fields | ||
---|---|---|
char | internal[128] |
Opaque array> |
Macro Definition Documentation
◆ NCCL_COMM_NULL
◆ NCCL_CONFIG_UNDEF_INT
◆ NCCL_CONFIG_UNDEF_PTR
◆ NCCL_MAJOR
◆ NCCL_MINOR
◆ NCCL_PATCH
◆ NCCL_SPLIT_NOCOLOR
◆ NCCL_SUFFIX
◆ NCCL_UNIQUE_ID_BYTES
◆ NCCL_VERSION
#define NCCL_VERSION | ( | X, | |
Y, | |||
Z | |||
) | (((X) <= 2 && (Y) <= 8) ? (X) *1000 + (Y) *100 + (Z) : (X) *10000 + (Y) *100 + (Z)) |
◆ NCCL_VERSION_CODE
◆ RCCL_ALLTOALLV
◆ RCCL_BFLOAT16
◆ RCCL_FLOAT8
◆ RCCL_GATHER_SCATTER
Typedef Documentation
◆ ncclComm_t
typedef const struct ncclComm* ncclComm_t |
Function Documentation
◆ ncclCommDeregister()
ncclResult_t ncclCommDeregister | ( | const ncclComm_t | comm, |
void * | handle | ||
) |
◆ ncclCommRegister()
ncclResult_t ncclCommRegister | ( | const ncclComm_t | comm, |
void * | buff, | ||
unsigned long | size, | ||
void ** | handle | ||
) |
◆ ncclMemAlloc()
ncclResult_t ncclMemAlloc | ( | void ** | ptr, |
unsigned long | size | ||
) |
◆ ncclMemFree()
ncclResult_t ncclMemFree | ( | void * | ptr | ) |
◆ pncclMemAlloc()
ncclResult_t pncclMemAlloc | ( | void ** | ptr, |
unsigned long | size | ||
) |
◆ pncclMemFree()
ncclResult_t pncclMemFree | ( | void * | ptr | ) |
Generated by 1.9.8