rocprofiler-sdk/rccl/details/api_trace.h Source File

rocprofiler-sdk/rccl/details/api_trace.h Source File#

Rocprofiler SDK Developer API: rocprofiler-sdk/rccl/details/api_trace.h Source File
Rocprofiler SDK Developer API 0.5.0
ROCm Profiling API and tools
api_trace.h
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
25#if !defined(ROCPROFILER_SDK_USE_SYSTEM_RCCL)
26# if defined __has_include
27# if __has_include(<rccl/rccl.h>)
28# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 1
29# else
30# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
31# endif
32# else
33# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
34# endif
35#endif
36
37#if ROCPROFILER_SDK_USE_SYSTEM_RCCL > 0
38# include <rccl/rccl.h>
39#else
41#endif
42
43#include <stddef.h>
44#include <stdint.h>
45
46// should only be increased if fundamental changes to dispatch table(s)
47#define RCCL_API_TRACE_VERSION_MAJOR 0
48
49// should be increased every time new members are added to existing dispatch tables
50#define RCCL_API_TRACE_VERSION_PATCH 0
51
52#if !defined(RCCL_EXTERN_C_INIT)
53# ifdef __cplusplus
54# define RCCL_EXTERN_C_INIT extern "C" {
55# else
56# define RCCL_EXTERN_C_INIT
57# endif
58#endif
59
60#if !defined(RCCL_EXTERN_C_FINI)
61# ifdef __cplusplus
62# define RCCL_EXTERN_C_FINI }
63# else
64# define RCCL_EXTERN_C_FINI
65# endif
66#endif
67
69
70typedef uint64_t rccl_range_id_t;
71typedef ncclResult_t (*ncclAllGather_fn_t)(const void* sendbuff,
72 void* recvbuff,
73 size_t sendcount,
74 ncclDataType_t datatype,
75 ncclComm_t comm,
76 hipStream_t stream);
77typedef ncclResult_t (*ncclAllReduce_fn_t)(const void* sendbuff,
78 void* recvbuff,
79 size_t count,
80 ncclDataType_t datatype,
81 ncclRedOp_t op,
82 struct ncclComm* comm,
83 hipStream_t stream);
84typedef ncclResult_t (*ncclAllToAll_fn_t)(const void* sendbuff,
85 void* recvbuff,
86 size_t count,
87 ncclDataType_t datatype,
88 ncclComm_t comm,
89 hipStream_t stream);
90typedef ncclResult_t (*ncclAllToAllv_fn_t)(const void* sendbuff,
91 const size_t sendcounts[],
92 const size_t sdispls[],
93 void* recvbuff,
94 const size_t recvcounts[],
95 const size_t rdispls[],
96 ncclDataType_t datatype,
97 ncclComm_t comm,
98 hipStream_t stream);
99typedef ncclResult_t (*ncclBroadcast_fn_t)(const void* sendbuff,
100 void* recvbuff,
101 size_t count,
102 ncclDataType_t datatype,
103 int root,
104 ncclComm_t comm,
105 hipStream_t stream);
106typedef ncclResult_t (*ncclGather_fn_t)(const void* sendbuff,
107 void* recvbuff,
108 size_t sendcount,
109 ncclDataType_t datatype,
110 int root,
111 ncclComm_t comm,
112 hipStream_t stream);
113typedef ncclResult_t (*ncclReduce_fn_t)(const void* sendbuff,
114 void* recvbuff,
115 size_t count,
116 ncclDataType_t datatype,
117 ncclRedOp_t op,
118 int root,
119 ncclComm_t comm,
120 hipStream_t stream);
121typedef ncclResult_t (*ncclReduceScatter_fn_t)(const void* sendbuff,
122 void* recvbuff,
123 size_t recvcount,
124 ncclDataType_t datatype,
125 ncclRedOp_t op,
126 struct ncclComm* comm,
127 hipStream_t stream);
128typedef ncclResult_t (*ncclScatter_fn_t)(const void* sendbuff,
129 void* recvbuff,
130 size_t recvcount,
131 ncclDataType_t datatype,
132 int root,
133 ncclComm_t comm,
134 hipStream_t stream);
135typedef ncclResult_t (*ncclSend_fn_t)(const void* sendbuff,
136 size_t count,
137 ncclDataType_t datatype,
138 int peer,
139 ncclComm_t comm,
140 hipStream_t stream);
141typedef ncclResult_t (*ncclRecv_fn_t)(void* recvbuff,
142 size_t count,
143 ncclDataType_t datatype,
144 int peer,
145 ncclComm_t comm,
146 hipStream_t stream);
148 void* scalar,
149 ncclDataType_t datatype,
150 ncclScalarResidence_t residence,
151 ncclComm_t comm);
155typedef ncclResult_t (*ncclGetVersion_fn_t)(int* version);
157
159 int nranks,
160 ncclUniqueId commId,
161 int myrank);
162
163typedef ncclResult_t (*ncclCommInitAll_fn_t)(ncclComm_t* comms, int ndev, const int* devlist);
164
166 int nranks,
167 ncclUniqueId commId,
168 int myrank,
169 ncclConfig_t* config);
170
172
174
176
178 int color,
179 int key,
180 ncclComm_t* newcomm,
181 ncclConfig_t* config);
182
183typedef const char* (*ncclGetErrorString_fn_t)(ncclResult_t code);
184
185typedef const char* (*ncclGetLastError_fn_t)(const ncclComm_t comm);
186
188
189typedef ncclResult_t (*ncclCommCount_fn_t)(const ncclComm_t comm, int* count);
190
191typedef ncclResult_t (*ncclCommCuDevice_fn_t)(const ncclComm_t comm, int* devid);
192
193typedef ncclResult_t (*ncclCommUserRank_fn_t)(const ncclComm_t comm, int* rank);
194
195typedef ncclResult_t (*ncclMemAlloc_fn_t)(void** ptr, size_t size);
196
197typedef ncclResult_t (*ncclMemFree_fn_t)(void* ptr);
198
199typedef ncclResult_t (*mscclLoadAlgo_fn_t)(const char* mscclAlgoFilePath,
200 mscclAlgoHandle_t* mscclAlgoHandle,
201 int rank);
202
203typedef ncclResult_t (*mscclRunAlgo_fn_t)(const void* sendBuff,
204 const size_t sendCounts[],
205 const size_t sDisPls[],
206 void* recvBuff,
207 const size_t recvCounts[],
208 const size_t rDisPls[],
209 size_t count,
210 ncclDataType_t dataType,
211 int root,
212 int peer,
213 ncclRedOp_t op,
214 mscclAlgoHandle_t mscclAlgoHandle,
215 ncclComm_t comm,
216 hipStream_t stream);
217
219
221 void* buff,
222 size_t size,
223 void** handle);
224
225typedef ncclResult_t (*ncclCommDeregister_fn_t)(const ncclComm_t comm, void* handle);
226
227typedef struct rcclApiFuncTable
228{
229 uint64_t size;
267
269
ncclRecv_fn_t ncclRecv_fn
Definition api_trace.h:240
ncclCommUserRank_fn_t ncclCommUserRank_fn
Definition api_trace.h:259
ncclResult_t(* ncclRecv_fn_t)(void *recvbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:141
ncclResult_t(* ncclCommInitRankConfig_fn_t)(ncclComm_t *comm, int nranks, ncclUniqueId commId, int myrank, ncclConfig_t *config)
Definition api_trace.h:165
ncclGroupEnd_fn_t ncclGroupEnd_fn
Definition api_trace.h:244
ncclResult_t(* ncclCommDestroy_fn_t)(ncclComm_t comm)
Definition api_trace.h:173
ncclAllToAll_fn_t ncclAllToAll_fn
Definition api_trace.h:232
mscclLoadAlgo_fn_t mscclLoadAlgo_fn
Definition api_trace.h:262
uint64_t rccl_range_id_t
Definition api_trace.h:70
const char *(* ncclGetErrorString_fn_t)(ncclResult_t code)
Definition api_trace.h:183
ncclResult_t(* ncclSend_fn_t)(const void *sendbuff, unsigned long count, ncclDataType_t datatype, int peer, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:135
#define RCCL_EXTERN_C_FINI
Definition api_trace.h:64
ncclGetLastError_fn_t ncclGetLastError_fn
Definition api_trace.h:255
ncclBroadcast_fn_t ncclBroadcast_fn
Definition api_trace.h:234
ncclResult_t(* mscclRunAlgo_fn_t)(const void *sendBuff, const unsigned long sendCounts[], const unsigned long sDisPls[], void *recvBuff, const unsigned long recvCounts[], const unsigned long rDisPls[], unsigned long count, ncclDataType_t dataType, int root, int peer, ncclRedOp_t op, mscclAlgoHandle_t mscclAlgoHandle, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:203
ncclResult_t(* ncclBroadcast_fn_t)(const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:99
ncclResult_t(* ncclCommDeregister_fn_t)(const ncclComm_t comm, void *handle)
Definition api_trace.h:225
mscclRunAlgo_fn_t mscclRunAlgo_fn
Definition api_trace.h:263
ncclResult_t(* ncclCommSplit_fn_t)(ncclComm_t comm, int color, int key, ncclComm_t *newcomm, ncclConfig_t *config)
Definition api_trace.h:177
ncclCommRegister_fn_t ncclCommRegister_fn
Definition api_trace.h:265
ncclReduce_fn_t ncclReduce_fn
Definition api_trace.h:236
ncclResult_t(* ncclAllToAll_fn_t)(const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:84
ncclResult_t(* ncclCommUserRank_fn_t)(const ncclComm_t comm, int *rank)
Definition api_trace.h:193
ncclResult_t(* ncclCommCuDevice_fn_t)(const ncclComm_t comm, int *devid)
Definition api_trace.h:191
ncclResult_t(* ncclRedOpDestroy_fn_t)(ncclRedOp_t op, ncclComm_t comm)
Definition api_trace.h:152
ncclResult_t(* ncclGather_fn_t)(const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:106
ncclGetVersion_fn_t ncclGetVersion_fn
Definition api_trace.h:245
ncclCommInitRank_fn_t ncclCommInitRank_fn
Definition api_trace.h:247
ncclResult_t(* ncclCommCount_fn_t)(const ncclComm_t comm, int *count)
Definition api_trace.h:189
ncclResult_t(* ncclMemFree_fn_t)(void *ptr)
Definition api_trace.h:197
ncclResult_t(* ncclGetVersion_fn_t)(int *version)
Definition api_trace.h:155
ncclResult_t(* ncclReduce_fn_t)(const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:113
ncclAllReduce_fn_t ncclAllReduce_fn
Definition api_trace.h:231
#define RCCL_EXTERN_C_INIT
Definition api_trace.h:56
ncclResult_t(* ncclAllReduce_fn_t)(const void *sendbuff, void *recvbuff, unsigned long count, ncclDataType_t datatype, ncclRedOp_t op, struct ncclComm *comm, hipStream_t stream)
Definition api_trace.h:77
ncclCommInitRankConfig_fn_t ncclCommInitRankConfig_fn
Definition api_trace.h:249
ncclSend_fn_t ncclSend_fn
Definition api_trace.h:239
ncclResult_t(* ncclCommGetAsyncError_fn_t)(ncclComm_t comm, ncclResult_t *asyncError)
Definition api_trace.h:187
ncclCommDeregister_fn_t ncclCommDeregister_fn
Definition api_trace.h:266
ncclResult_t(* ncclMemAlloc_fn_t)(void **ptr, unsigned long size)
Definition api_trace.h:195
ncclCommCount_fn_t ncclCommCount_fn
Definition api_trace.h:257
ncclResult_t(* ncclGroupStart_fn_t)()
Definition api_trace.h:153
ncclReduceScatter_fn_t ncclReduceScatter_fn
Definition api_trace.h:237
ncclResult_t(* mscclUnloadAlgo_fn_t)(mscclAlgoHandle_t mscclAlgoHandle)
Definition api_trace.h:218
ncclResult_t(* ncclCommAbort_fn_t)(ncclComm_t comm)
Definition api_trace.h:175
ncclCommCuDevice_fn_t ncclCommCuDevice_fn
Definition api_trace.h:258
ncclMemFree_fn_t ncclMemFree_fn
Definition api_trace.h:261
ncclCommInitAll_fn_t ncclCommInitAll_fn
Definition api_trace.h:248
ncclGetErrorString_fn_t ncclGetErrorString_fn
Definition api_trace.h:254
ncclResult_t(* ncclReduceScatter_fn_t)(const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, ncclRedOp_t op, struct ncclComm *comm, hipStream_t stream)
Definition api_trace.h:121
ncclGetUniqueId_fn_t ncclGetUniqueId_fn
Definition api_trace.h:246
ncclGather_fn_t ncclGather_fn
Definition api_trace.h:235
ncclGroupStart_fn_t ncclGroupStart_fn
Definition api_trace.h:243
ncclRedOpCreatePreMulSum_fn_t ncclRedOpCreatePreMulSum_fn
Definition api_trace.h:241
ncclResult_t(* ncclScatter_fn_t)(const void *sendbuff, void *recvbuff, unsigned long recvcount, ncclDataType_t datatype, int root, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:128
ncclRedOpDestroy_fn_t ncclRedOpDestroy_fn
Definition api_trace.h:242
ncclResult_t(* ncclAllGather_fn_t)(const void *sendbuff, void *recvbuff, unsigned long sendcount, ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:71
ncclCommAbort_fn_t ncclCommAbort_fn
Definition api_trace.h:252
ncclAllGather_fn_t ncclAllGather_fn
Definition api_trace.h:230
ncclMemAlloc_fn_t ncclMemAlloc_fn
Definition api_trace.h:260
ncclCommSplit_fn_t ncclCommSplit_fn
Definition api_trace.h:253
ncclScatter_fn_t ncclScatter_fn
Definition api_trace.h:238
ncclResult_t(* ncclGroupEnd_fn_t)()
Definition api_trace.h:154
ncclResult_t(* ncclAllToAllv_fn_t)(const void *sendbuff, const unsigned long sendcounts[], const unsigned long sdispls[], void *recvbuff, const unsigned long recvcounts[], const unsigned long rdispls[], ncclDataType_t datatype, ncclComm_t comm, hipStream_t stream)
Definition api_trace.h:90
ncclCommGetAsyncError_fn_t ncclCommGetAsyncError_fn
Definition api_trace.h:256
ncclResult_t(* ncclCommRegister_fn_t)(const ncclComm_t comm, void *buff, unsigned long size, void **handle)
Definition api_trace.h:220
ncclCommDestroy_fn_t ncclCommDestroy_fn
Definition api_trace.h:251
ncclResult_t(* ncclCommInitRank_fn_t)(ncclComm_t *newcomm, int nranks, ncclUniqueId commId, int myrank)
Definition api_trace.h:158
ncclResult_t(* ncclCommInitAll_fn_t)(ncclComm_t *comms, int ndev, const int *devlist)
Definition api_trace.h:163
ncclResult_t(* ncclGetUniqueId_fn_t)(ncclUniqueId *out)
Definition api_trace.h:156
ncclResult_t(* ncclCommFinalize_fn_t)(ncclComm_t comm)
Definition api_trace.h:171
mscclUnloadAlgo_fn_t mscclUnloadAlgo_fn
Definition api_trace.h:264
ncclAllToAllv_fn_t ncclAllToAllv_fn
Definition api_trace.h:233
ncclCommFinalize_fn_t ncclCommFinalize_fn
Definition api_trace.h:250
ncclResult_t(* mscclLoadAlgo_fn_t)(const char *mscclAlgoFilePath, mscclAlgoHandle_t *mscclAlgoHandle, int rank)
Definition api_trace.h:199
ncclResult_t(* ncclRedOpCreatePreMulSum_fn_t)(ncclRedOp_t *op, void *scalar, ncclDataType_t datatype, ncclScalarResidence_t residence, ncclComm_t comm)
Definition api_trace.h:147
const char *(* ncclGetLastError_fn_t)(const ncclComm_t comm)
Definition api_trace.h:185
int mscclAlgoHandle_t
Opaque handle to MSCCL algorithm.
Definition rccl.h:944
ncclScalarResidence_t
Location and dereferencing logic for scalar arguments.
Definition rccl.h:469
ncclRedOp_t
Reduction operation selector.
Definition rccl.h:417
ncclDataType_t
Data types.
Definition rccl.h:431
ncclResult_t
Result type.
Definition rccl.h:56
const struct ncclComm * ncclComm_t
Opaque handle to communicator.
Definition rccl.h:38
Opaque unique id used to initialize communicators.
Definition rccl.h:45