rocprofiler-sdk/callback_tracing.h Source File

rocprofiler-sdk/callback_tracing.h Source File#

Rocprofiler SDK Developer API: rocprofiler-sdk/callback_tracing.h Source File
Rocprofiler SDK Developer API 0.5.0
ROCm Profiling API and tools
callback_tracing.h
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
26#include <rocprofiler-sdk/fwd.h>
27#include <rocprofiler-sdk/hip.h>
28#include <rocprofiler-sdk/hsa.h>
31
32#include <hsa/hsa.h>
33#include <hsa/hsa_amd_tool.h>
34#include <hsa/hsa_ext_amd.h>
35#include <hsa/hsa_ven_amd_loader.h>
36
37#include <stdint.h>
38
39ROCPROFILER_EXTERN_C_INIT
40
41/**
42 * @defgroup CALLBACK_TRACING_SERVICE Synchronous Tracing Services
43 * @brief Receive immediate callbacks on the calling thread
44 *
45 * @{
46 */
47
48/**
49 * @brief ROCProfiler Enumeration for code object storage types (identical values to
50 * `hsa_ven_amd_loader_code_object_storage_type_t` enumeration)
51 */
52typedef enum
53{
54 ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_NONE = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE,
55 ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE,
57 HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY,
60
61/**
62 * @brief ROCProfiler HSA API Callback Data.
63 */
70
71/**
72 * @brief ROCProfiler HIP runtime and compiler API Tracer Callback Data.
73 */
80
81/**
82 * @brief ROCProfiler Marker Tracer Callback Data.
83 */
90
91/**
92 * @brief ROCProfiler RCCL API Callback Data.
93 */
100
101/**
102 * @brief ROCProfiler Code Object Load Tracer Callback Record.
103 */
104typedef struct
105{
106 uint64_t size; ///< size of this struct
107 uint64_t code_object_id; ///< unique code object identifier
108 rocprofiler_agent_id_t rocp_agent; ///< The agent on which this loaded code object is loaded
109 hsa_agent_t hsa_agent; ///< The agent on which this loaded code object is loaded
110 const char* uri; ///< The URI name from which the code object was loaded
111 uint64_t load_base; ///< The base memory address at which the code object is loaded. This is
112 ///< the base address of the allocation for the lowest addressed segment of
113 ///< the code object that is loaded. Note that any non-loaded segments
114 ///< before the first loaded segment are ignored.
115 uint64_t load_size; ///< The byte size of the loaded code objects contiguous memory allocation.
116 int64_t load_delta; ///< The signed byte address difference of the memory address at which the
117 ///< code object is loaded minus the virtual address specified in the code
118 ///< object that is loaded.
120 storage_type; ///< storage type of the code object reader used to load the loaded code
121 ///< object
122 union
123 {
124 struct
125 {
126 int storage_file; ///< file descriptor of the code object that was loaded. Access this
127 ///< field if @ref rocprofiler_code_object_storage_type_t is
128 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE
129 };
130 struct
131 {
132 uint64_t memory_base; ///< The memory address of the first byte of the code object that
133 ///< was loaded. Access this
134 ///< field if @ref rocprofiler_code_object_storage_type_t is
135 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
136 uint64_t memory_size; ///< The memory size in bytes of the code object that was loaded.
137 ///< Access this field if @ref
138 ///< rocprofiler_code_object_storage_type_t is
139 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
140 };
141 };
143
144/**
145 * @brief The NULL value of a code object id. Used when code object is unknown.
146 */
147#define ROCPROFILER_CODE_OBJECT_ID_NONE ROCPROFILER_UINT64_C(0)
148
149/**
150 * @brief ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
151 *
152 */
153typedef struct
154{
155 uint64_t size; ///< size of this struct
156 uint64_t kernel_id; ///< unique symbol identifier value
157 uint64_t code_object_id; ///< parent unique code object identifier
158 const char* kernel_name; ///< name of the kernel
159 uint64_t kernel_object; ///< kernel object handle, used in the kernel dispatch packet
160 uint32_t kernarg_segment_size; ///< size of memory (in bytes) allocated for kernel arguments.
161 ///< Will be multiple of 16
162 uint32_t kernarg_segment_alignment; ///< Alignment (in bytes) of the buffer used to pass
163 ///< arguments to the kernel
164 uint32_t group_segment_size; ///< Size of static group segment memory required by the kernel
165 ///< (per work-group), in bytes. AKA: LDS size
166 uint32_t private_segment_size; ///< Size of static private, spill, and arg segment memory
167 ///< required by this kernel (per work-item), in bytes. AKA:
168 ///< scratch size
169 uint32_t sgpr_count; ///< Scalar general purpose register count
170 uint32_t arch_vgpr_count; ///< Architecture vector general purpose register count
171 uint32_t accum_vgpr_count; ///< Accum vector general purpose register count
172
174
175/**
176 * @brief ROCProfiler Kernel Dispatch Callback Tracer Record.
177 *
178 */
186
187/**
188 * @brief ROCProfiler Memory Copy Callback Tracer Record.
189 *
190 * The timestamps in this record will only be non-zero in the ::ROCPROFILER_CALLBACK_PHASE_EXIT
191 * callback
192 */
193typedef struct
194{
195 uint64_t size; ///< size of this struct
196 rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
197 rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
198 rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
199 rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
200 uint64_t bytes; ///< bytes copied
202
203/**
204 * @brief ROCProfiler Scratch Memory Callback Data.
205 */
215
216/**
217 * @brief API Tracing callback function. This function is invoked twice per API function: once
218 * before the function is invoked and once after the function is invoked. The external correlation
219 * id value within the record is assigned the value at the top of the external correlation id stack.
220 * It is permissible to invoke @ref rocprofiler_push_external_correlation_id within the enter phase;
221 * when a new external correlation id is pushed during the enter phase, rocprofiler will use that
222 * external correlation id for any async events and provide the new external correlation id during
223 * the exit callback... In other words, pushing a new external correlation id within the enter
224 * callback will result in that external correlation id value in the exit callback (which may or may
225 * not be different from the external correlation id value in the enter callback). If a tool pushes
226 * new external correlation ids in the enter phase, it is recommended to pop the external
227 * correlation id in the exit callback.
228 *
229 * @param [in] record Callback record data
230 * @param [in,out] user_data This paramter can be used to retain information in between the enter
231 * and exit phases.
232 * @param [in] callback_data User data provided when configuring the callback tracing service
233 */
235 rocprofiler_user_data_t* user_data,
236 void* callback_data) ROCPROFILER_NONNULL(2);
237
238/**
239 * @brief Callback function for mapping @ref rocprofiler_callback_tracing_kind_t ids to
240 * string names. @see rocprofiler_iterate_callback_tracing_kind_names.
241 */
243 void* data);
244
245/**
246 * @brief Callback function for mapping the operations of a given @ref
247 * rocprofiler_callback_tracing_kind_t to string names. @see
248 * rocprofiler_iterate_callback_tracing_kind_operation_names.
249 */
253 void* data);
254
255/**
256 * @brief Callback function for iterating over the function arguments to a traced function.
257 * This function will be invoked for each argument.
258 * @see rocprofiler_iterate_callback_tracing_operation_args
259 *
260 * @param [in] kind domain
261 * @param [in] operation associated domain operation
262 * @param [in] arg_number the argument number, starting at zero
263 * @param [in] arg_value_addr the address of the argument stored by rocprofiler.
264 * @param [in] arg_indirection_count the total number of indirection levels for the argument, e.g.
265 * int == 0, int* == 1, int** == 2
266 * @param [in] arg_type the typeid name of the argument
267 * @param [in] arg_name the name of the argument in the prototype (or rocprofiler union)
268 * @param [in] arg_value_str conversion of the argument to a string, e.g. operator<< overload
269 * @param [in] arg_dereference_count the number of times the argument was dereferenced when it was
270 * converted to a string
271 * @param [in] data user data
272 */
276 uint32_t arg_number,
277 const void* const arg_value_addr,
278 int32_t arg_indirection_count,
279 const char* arg_type,
280 const char* arg_name,
281 const char* arg_value_str,
282 int32_t arg_dereference_count,
283 void* data);
284
285/**
286 * @brief Configure Callback Tracing Service. The callback tracing service provides two synchronous
287 * callbacks around an API function on the same thread as the application which is invoking the API
288 * function. This function can only be invoked once per @ref
289 * rocprofiler_callback_tracing_kind_t value, i.e. it can be invoked once for the HSA API,
290 * once for the HIP API, and so on but it will fail if it is invoked for the HSA API twice. Please
291 * note, the callback API does have the potentially non-trivial overhead of copying the function
292 * arguments into the record. If you are willing to let rocprofiler record the timestamps, do not
293 * require synchronous notifications of the API calls, and want to lowest possible overhead, use the
294 * @see BUFFER_TRACING_SERVICE.
295 *
296 * @param [in] context_id Context to associate the service with
297 * @param [in] kind The domain of the callback tracing service
298 * @param [in] operations Array of operations in the domain (i.e. enum values which identify
299 * specific API functions). If this is null, all API functions in the domain will be traced
300 * @param [in] operations_count If the operations array is non-null, set this to the size of the
301 * array.
302 * @param [in] callback The function to invoke before and after an API function
303 * @param [in] callback_args Data provided to every invocation of the callback function
304 * @return ::rocprofiler_status_t
305 * @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Invoked outside of the initialization
306 * function in @ref rocprofiler_tool_configure_result_t provided to rocprofiler via @ref
307 * rocprofiler_configure function
308 * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND The provided context is not valid/registered
309 * @retval ::ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED if the same @ref
310 * rocprofiler_callback_tracing_kind_t value is provided more than once (per context) -- in
311 * other words, we do not support overriding or combining the operations in separate function calls.
312 *
313 */
317 const rocprofiler_tracing_operation_t* operations,
318 size_t operations_count,
320 void* callback_args) ROCPROFILER_API;
321
322/**
323 * @brief Query the name of the callback tracing kind. The name retrieved from this function is a
324 * string literal that is encoded in the read-only section of the binary (i.e. it is always
325 * "allocated" and never "deallocated").
326 *
327 * @param [in] kind Callback tracing domain
328 * @param [out] name If non-null and the name is a constant string that does not require dynamic
329 * allocation, this paramter will be set to the address of the string literal, otherwise it will
330 * be set to nullptr
331 * @param [out] name_len If non-null, this will be assigned the length of the name (regardless of
332 * the name is a constant string or requires dynamic allocation)
333 * @return ::rocprofiler_status_t
334 */
337 const char** name,
338 uint64_t* name_len) ROCPROFILER_API;
339
340/**
341 * @brief Query the name of the callback tracing kind. The name retrieved from this function is a
342 * string literal that is encoded in the read-only section of the binary (i.e. it is always
343 * "allocated" and never "deallocated").
344 *
345 * @param [in] kind Callback tracing domain
346 * @param [in] operation Enumeration id value which maps to a specific API function or event type
347 * @param [out] name If non-null and the name is a constant string that does not require dynamic
348 * allocation, this paramter will be set to the address of the string literal, otherwise it will
349 * be set to nullptr
350 * @param [out] name_len If non-null, this will be assigned the length of the name (regardless of
351 * the name is a constant string or requires dynamic allocation)
352 * @return ::rocprofiler_status_t
353 * @retval ::ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND Domain id is not valid
354 * @retval ::ROCPROFILER_STATUS_SUCCESS Valid domain provided, regardless if there is a constant
355 * string or not.
356 */
360 const char** name,
361 uint64_t* name_len) ROCPROFILER_API;
362
363/**
364 * @brief Iterate over all the mappings of the callback tracing kinds and get a callback for each
365 * kind.
366 *
367 * @param [in] callback Callback function invoked for each enumeration value in @ref
368 * rocprofiler_callback_tracing_kind_t with the exception of the `NONE` and `LAST` values.
369 * @param [in] data User data passed back into the callback
370 * @return ::rocprofiler_status_t
371 */
374 void* data) ROCPROFILER_API ROCPROFILER_NONNULL(1);
375
376/**
377 * @brief Iterates over all the mappings of the operations for a given @ref
378 * rocprofiler_callback_tracing_kind_t and invokes the callback with the kind id, operation
379 * id, and user-provided data.
380 *
381 * @param [in] kind which tracing callback kind operations to iterate over
382 * @param [in] callback Callback function invoked for each operation associated with @ref
383 * rocprofiler_callback_tracing_kind_t with the exception of the `NONE` and `LAST` values.
384 * @param [in] data User data passed back into the callback
385 * @return ::rocprofiler_status_t
386 * @retval ::ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND Invalid domain id
387 * @retval ::ROCPROFILER_STATUS_SUCCESS Valid domain
388 */
393 void* data) ROCPROFILER_API ROCPROFILER_NONNULL(2);
394
395/**
396 * @brief Iterates over all the arguments for the traced function (when available). This is
397 * particularly useful when tools want to annotate traces with the function arguments. See
398 * @example samples/api_callback_tracing/client.cpp for a usage example.
399 *
400 * It is recommended to use this function when the record phase is ::ROCPROFILER_CALLBACK_PHASE_EXIT
401 * or ::ROCPROFILER_CALLBACK_PHASE_NONE. When the phase is ::ROCPROFILER_CALLBACK_PHASE_ENTER, the
402 * function may have output parameters which have not set. In the case of an output parameter with
403 * one level of indirection, e.g. `int* output_len`, this is considered safe since the output
404 * parameter is either null or, in the worst case scenario, pointing to an uninitialized value which
405 * will result in garbage values to be stringified. However, if the output parameter has more than
406 * one level of indirection, e.g. `const char** output_name`, this can result in a segmentation
407 * fault because the dereferenced output parameter may be uninitialized and point to an invalid
408 address. E.g.:
409 *
410 * @code{.cpp}
411 * struct dim3
412 * {
413 * int x;
414 * int y;
415 * int z;
416 * };
417 *
418 * static dim3 default_dims = {.x = 1, .y = 1, .z = 1};
419 *
420 * void set_dim_x(int val, dim3* output_dims) { output_dims->x = val; }
421 *
422 * void get_default_dims(dim3** output_dims) { *output_dims = default_dims; }
423 *
424 * int main()
425 * {
426 * dim3 my_dims; // uninitialized value. x, y, and z may be set to random values
427 * dim3* current_dims; // uninitialized pointer. May be set to invalid address
428 *
429 * set_dim_x(3, &my_dims); // if rocprofiler-sdk wrapped this function and tried to stringify
430 * // in the enter phase, dereferencing my_dims is not problematic
431 * // since there is an actual dim3 allocation
432 *
433 * get_default_dims(&current_dims); // if rocprofiler-sdk wrapped this function,
434 * // and tried to stringify in the enter phase,
435 * // current_dims may point to an address outside
436 * // of the address space of this process and
437 * // cause a segfault
438 * }
439 * @endcode
440 *
441 *
442 * @param[in] record Record provided by service callback
443 * @param[in] callback The callback function which will be invoked for each argument
444 * @param[in] max_dereference_count In the callback enter phase, certain arguments may be output
445 * parameters which have not been set. When the output parameter has multiple levels of indirection,
446 * it may be invalid to dereference the output parameter more than once and doing so may result in a
447 * segmentation fault. Thus, it is recommended to set this parameter to a maximum value of 1 when
448 * the phase is ::ROCPROFILER_CALLBACK_PHASE_ENTER to ensure that output parameters which point to
449 * uninitialized pointers do not cause segmentation faults.
450 * @param[in] user_data Data to be passed to each invocation of the callback
451 */
456 int32_t max_dereference_count,
457 void* user_data) ROCPROFILER_API ROCPROFILER_NONNULL(2);
458
459/** @} */
460
461ROCPROFILER_EXTERN_C_FINI
int32_t rocprofiler_tracing_operation_t
Tracing Operation ID. Depending on the kind, operations can be determined. If the value is equal to z...
Definition fwd.h:452
rocprofiler_scratch_alloc_flag_t
Allocation flags for.
Definition fwd.h:338
rocprofiler_status_t
Status codes.
Definition fwd.h:55
uint64_t rocprofiler_timestamp_t
ROCProfiler Timestamp.
Definition fwd.h:433
rocprofiler_callback_tracing_kind_t
Service Callback Tracing Kind.
Definition fwd.h:157
Agent Identifier.
Definition fwd.h:545
Context ID.
Definition fwd.h:506
ROCProfiler kernel dispatch information.
Definition fwd.h:657
User-assignable data type.
Definition fwd.h:491
uint32_t group_segment_size
Size of static group segment memory required by the kernel (per work-group), in bytes....
rocprofiler_agent_id_t rocp_agent
The agent on which this loaded code object is loaded.
rocprofiler_timestamp_t start_timestamp
start time in nanoseconds
uint64_t code_object_id
unique code object identifier
uint32_t arch_vgpr_count
Architecture vector general purpose register count.
rocprofiler_timestamp_t end_timestamp
end time in nanoseconds
rocprofiler_kernel_dispatch_info_t dispatch_info
Dispatch info.
uint32_t kernarg_segment_alignment
Alignment (in bytes) of the buffer used to pass arguments to the kernel.
rocprofiler_timestamp_t start_timestamp
start time in nanoseconds
uint32_t accum_vgpr_count
Accum vector general purpose register count.
rocprofiler_code_object_storage_type_t storage_type
storage type of the code object reader used to load the loaded code object
hsa_agent_t hsa_agent
The agent on which this loaded code object is loaded.
uint64_t load_base
The base memory address at which the code object is loaded. This is the base address of the allocatio...
uint32_t kernarg_segment_size
size of memory (in bytes) allocated for kernel arguments. Will be multiple of 16
uint64_t load_size
The byte size of the loaded code objects contiguous memory allocation.
uint32_t private_segment_size
Size of static private, spill, and arg segment memory required by this kernel (per work-item),...
const char * uri
The URI name from which the code object was loaded.
rocprofiler_timestamp_t end_timestamp
end time in nanoseconds
uint64_t kernel_object
kernel object handle, used in the kernel dispatch packet
rocprofiler_agent_id_t dst_agent_id
destination agent of copy
int64_t load_delta
The signed byte address difference of the memory address at which the code object is loaded minus the...
rocprofiler_agent_id_t src_agent_id
source agent of copy
rocprofiler_status_t rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_tracing_kind_t kind, rocprofiler_tracing_operation_t operation, const char **name, uint64_t *name_len)
Query the name of the callback tracing kind. The name retrieved from this function is a string litera...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kind_operation_args(rocprofiler_callback_tracing_record_t record, rocprofiler_callback_tracing_operation_args_cb_t callback, int32_t max_dereference_count, void *user_data)
int(* rocprofiler_callback_tracing_kind_cb_t)(rocprofiler_callback_tracing_kind_t kind, void *data)
Callback function for mapping rocprofiler_callback_tracing_kind_t ids to string names.
int(* rocprofiler_callback_tracing_kind_operation_cb_t)(rocprofiler_callback_tracing_kind_t kind, rocprofiler_tracing_operation_t operation, void *data)
Callback function for mapping the operations of a given rocprofiler_callback_tracing_kind_t to string...
int(* rocprofiler_callback_tracing_operation_args_cb_t)(rocprofiler_callback_tracing_kind_t kind, rocprofiler_tracing_operation_t operation, uint32_t arg_number, const void *const arg_value_addr, int32_t arg_indirection_count, const char *arg_type, const char *arg_name, const char *arg_value_str, int32_t arg_dereference_count, void *data)
Callback function for iterating over the function arguments to a traced function. This function will ...
rocprofiler_status_t rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t context_id, rocprofiler_callback_tracing_kind_t kind, const rocprofiler_tracing_operation_t *operations, unsigned long operations_count, rocprofiler_callback_tracing_cb_t callback, void *callback_args)
Configure Callback Tracing Service. The callback tracing service provides two synchronous callbacks a...
rocprofiler_status_t rocprofiler_query_callback_tracing_kind_name(rocprofiler_callback_tracing_kind_t kind, const char **name, uint64_t *name_len)
Query the name of the callback tracing kind. The name retrieved from this function is a string litera...
rocprofiler_code_object_storage_type_t
ROCProfiler Enumeration for code object storage types (identical values to hsa_ven_amd_loader_code_ob...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kind_operations(rocprofiler_callback_tracing_kind_t kind, rocprofiler_callback_tracing_kind_operation_cb_t callback, void *data)
Iterates over all the mappings of the operations for a given rocprofiler_callback_tracing_kind_t and ...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kinds(rocprofiler_callback_tracing_kind_cb_t callback, void *data)
Iterate over all the mappings of the callback tracing kinds and get a callback for each kind.
void(* rocprofiler_callback_tracing_cb_t)(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t *user_data, void *callback_data)
API Tracing callback function. This function is invoked twice per API function: once before the funct...
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_NONE
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_LAST
ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
ROCProfiler Code Object Load Tracer Callback Record.
ROCProfiler HIP runtime and compiler API Tracer Callback Data.
ROCProfiler HSA API Callback Data.
ROCProfiler Kernel Dispatch Callback Tracer Record.
ROCProfiler Marker Tracer Callback Data.
ROCProfiler Memory Copy Callback Tracer Record.
ROCProfiler RCCL API Callback Data.
ROCProfiler Scratch Memory Callback Data.