rocprofiler-sdk/callback_tracing.h Source File

rocprofiler-sdk/callback_tracing.h Source File#

Rocprofiler SDK Developer API: rocprofiler-sdk/callback_tracing.h Source File
Rocprofiler SDK Developer API 0.4.0
ROCm Profiling API and tools
callback_tracing.h
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
26#include <rocprofiler-sdk/fwd.h>
27#include <rocprofiler-sdk/hip.h>
28#include <rocprofiler-sdk/hsa.h>
30
31#include <hsa/hsa.h>
32#include <hsa/hsa_amd_tool.h>
33#include <hsa/hsa_ext_amd.h>
34#include <hsa/hsa_ven_amd_loader.h>
35
36#include <stdint.h>
37
38ROCPROFILER_EXTERN_C_INIT
39
40/**
41 * @defgroup CALLBACK_TRACING_SERVICE Synchronous Tracing Services
42 * @brief Receive immediate callbacks on the calling thread
43 *
44 * @{
45 */
46
47/**
48 * @brief ROCProfiler Enumeration for code object storage types (identical values to
49 * `hsa_ven_amd_loader_code_object_storage_type_t` enumeration)
50 */
51typedef enum
52{
53 ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_NONE = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE,
54 ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE,
56 HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY,
59
60/**
61 * @brief ROCProfiler HSA API Callback Data.
62 */
69
70/**
71 * @brief ROCProfiler HIP runtime and compiler API Tracer Callback Data.
72 */
79
80/**
81 * @brief ROCProfiler Marker Tracer Callback Data.
82 */
89
90/**
91 * @brief ROCProfiler Code Object Load Tracer Callback Record.
92 */
93typedef struct
94{
95 uint64_t size; ///< size of this struct
96 uint64_t code_object_id; ///< unique code object identifier
97 rocprofiler_agent_id_t rocp_agent; ///< The agent on which this loaded code object is loaded
98 hsa_agent_t hsa_agent; ///< The agent on which this loaded code object is loaded
99 const char* uri; ///< The URI name from which the code object was loaded
100 uint64_t load_base; ///< The base memory address at which the code object is loaded. This is
101 ///< the base address of the allocation for the lowest addressed segment of
102 ///< the code object that is loaded. Note that any non-loaded segments
103 ///< before the first loaded segment are ignored.
104 uint64_t load_size; ///< The byte size of the loaded code objects contiguous memory allocation.
105 int64_t load_delta; ///< The signed byte address difference of the memory address at which the
106 ///< code object is loaded minus the virtual address specified in the code
107 ///< object that is loaded.
109 storage_type; ///< storage type of the code object reader used to load the loaded code
110 ///< object
111 union
112 {
113 struct
114 {
115 int storage_file; ///< file descriptor of the code object that was loaded. Access this
116 ///< field if @ref rocprofiler_code_object_storage_type_t is
117 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE
118 };
119 struct
120 {
121 uint64_t memory_base; ///< The memory address of the first byte of the code object that
122 ///< was loaded. Access this
123 ///< field if @ref rocprofiler_code_object_storage_type_t is
124 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
125 uint64_t memory_size; ///< The memory size in bytes of the code object that was loaded.
126 ///< Access this field if @ref
127 ///< rocprofiler_code_object_storage_type_t is
128 ///< @ref ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
129 };
130 };
132
133/**
134 * @brief ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
135 *
136 */
137typedef struct
138{
139 uint64_t size; ///< size of this struct
140 uint64_t kernel_id; ///< unique symbol identifier value
141 uint64_t code_object_id; ///< parent unique code object identifier
142 const char* kernel_name; ///< name of the kernel
143 uint64_t kernel_object; ///< kernel object handle, used in the kernel dispatch packet
144 uint32_t kernarg_segment_size; ///< size of memory (in bytes) allocated for kernel arguments.
145 ///< Will be multiple of 16
146 uint32_t kernarg_segment_alignment; ///< Alignment (in bytes) of the buffer used to pass
147 ///< arguments to the kernel
148 uint32_t group_segment_size; ///< Size of static group segment memory required by the kernel
149 ///< (per work-group), in bytes. AKA: LDS size
150 uint32_t private_segment_size; ///< Size of static private, spill, and arg segment memory
151 ///< required by this kernel (per work-item), in bytes. AKA:
152 ///< scratch size
153 uint32_t sgpr_count; ///< Scalar general purpose register count
154 uint32_t arch_vgpr_count; ///< Architecture vector general purpose register count
155 uint32_t accum_vgpr_count; ///< Accum vector general purpose register count
156
158
159/**
160 * @brief ROCProfiler Kernel Dispatch Callback Tracer Record.
161 *
162 */
170
171/**
172 * @brief ROCProfiler Memory Copy Callback Tracer Record.
173 *
174 * The timestamps in this record will only be non-zero in the ::ROCPROFILER_CALLBACK_PHASE_EXIT
175 * callback
176 */
177typedef struct
178{
179 uint64_t size; ///< size of this struct
180 rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
181 rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
182 rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
183 rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
184 uint64_t bytes; ///< bytes copied
186
187/**
188 * @brief ROCProfiler Scratch Memory Callback Data.
189 */
199
200/**
201 * @brief API Tracing callback function. This function is invoked twice per API function: once
202 * before the function is invoked and once after the function is invoked. The external correlation
203 * id value within the record is assigned the value at the top of the external correlation id stack.
204 * It is permissible to invoke @ref rocprofiler_push_external_correlation_id within the enter phase;
205 * when a new external correlation id is pushed during the enter phase, rocprofiler will use that
206 * external correlation id for any async events and provide the new external correlation id during
207 * the exit callback... In other words, pushing a new external correlation id within the enter
208 * callback will result in that external correlation id value in the exit callback (which may or may
209 * not be different from the external correlation id value in the enter callback). If a tool pushes
210 * new external correlation ids in the enter phase, it is recommended to pop the external
211 * correlation id in the exit callback.
212 *
213 * @param [in] record Callback record data
214 * @param [in,out] user_data This paramter can be used to retain information in between the enter
215 * and exit phases.
216 * @param [in] callback_data User data provided when configuring the callback tracing service
217 */
219 rocprofiler_user_data_t* user_data,
220 void* callback_data) ROCPROFILER_NONNULL(2);
221
222/**
223 * @brief Callback function for mapping @ref rocprofiler_callback_tracing_kind_t ids to
224 * string names. @see rocprofiler_iterate_callback_tracing_kind_names.
225 */
227 void* data);
228
229/**
230 * @brief Callback function for mapping the operations of a given @ref
231 * rocprofiler_callback_tracing_kind_t to string names. @see
232 * rocprofiler_iterate_callback_tracing_kind_operation_names.
233 */
236 uint32_t operation,
237 void* data);
238
239/**
240 * @brief Callback function for iterating over the function arguments to a traced function.
241 * This function will be invoked for each argument.
242 * @see rocprofiler_iterate_callback_tracing_operation_args
243 *
244 * @param [in] kind domain
245 * @param [in] operation associated domain operation
246 * @param [in] arg_number the argument number, starting at zero
247 * @param [in] arg_value_addr the address of the argument stored by rocprofiler.
248 * @param [in] arg_indirection_count the total number of indirection levels for the argument, e.g.
249 * int == 0, int* == 1, int** == 2
250 * @param [in] arg_type the typeid name of the argument
251 * @param [in] arg_name the name of the argument in the prototype (or rocprofiler union)
252 * @param [in] arg_value_str conversion of the argument to a string, e.g. operator<< overload
253 * @param [in] arg_dereference_count the number of times the argument was dereferenced when it was
254 * converted to a string
255 * @param [in] data user data
256 */
259 uint32_t operation,
260 uint32_t arg_number,
261 const void* const arg_value_addr,
262 int32_t arg_indirection_count,
263 const char* arg_type,
264 const char* arg_name,
265 const char* arg_value_str,
266 int32_t arg_dereference_count,
267 void* data);
268
269/**
270 * @brief Configure Callback Tracing Service. The callback tracing service provides two synchronous
271 * callbacks around an API function on the same thread as the application which is invoking the API
272 * function. This function can only be invoked once per @ref
273 * rocprofiler_callback_tracing_kind_t value, i.e. it can be invoked once for the HSA API,
274 * once for the HIP API, and so on but it will fail if it is invoked for the HSA API twice. Please
275 * note, the callback API does have the potentially non-trivial overhead of copying the function
276 * arguments into the record. If you are willing to let rocprofiler record the timestamps, do not
277 * require synchronous notifications of the API calls, and want to lowest possible overhead, use the
278 * @see BUFFER_TRACING_SERVICE.
279 *
280 * @param [in] context_id Context to associate the service with
281 * @param [in] kind The domain of the callback tracing service
282 * @param [in] operations Array of operations in the domain (i.e. enum values which identify
283 * specific API functions). If this is null, all API functions in the domain will be traced
284 * @param [in] operations_count If the operations array is non-null, set this to the size of the
285 * array.
286 * @param [in] callback The function to invoke before and after an API function
287 * @param [in] callback_args Data provided to every invocation of the callback function
288 * @return ::rocprofiler_status_t
289 * @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Invoked outside of the initialization
290 * function in @ref rocprofiler_tool_configure_result_t provided to rocprofiler via @ref
291 * rocprofiler_configure function
292 * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND The provided context is not valid/registered
293 * @retval ::ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED if the same @ref
294 * rocprofiler_callback_tracing_kind_t value is provided more than once (per context) -- in
295 * other words, we do not support overriding or combining the operations in separate function calls.
296 *
297 */
298rocprofiler_status_t ROCPROFILER_API
302 size_t operations_count,
304 void* callback_args);
305
306/**
307 * @brief Query the name of the callback tracing kind. The name retrieved from this function is a
308 * string literal that is encoded in the read-only section of the binary (i.e. it is always
309 * "allocated" and never "deallocated").
310 *
311 * @param [in] kind Callback tracing domain
312 * @param [out] name If non-null and the name is a constant string that does not require dynamic
313 * allocation, this paramter will be set to the address of the string literal, otherwise it will
314 * be set to nullptr
315 * @param [out] name_len If non-null, this will be assigned the length of the name (regardless of
316 * the name is a constant string or requires dynamic allocation)
317 * @return ::rocprofiler_status_t
318 */
321 const char** name,
322 uint64_t* name_len) ROCPROFILER_API;
323
324/**
325 * @brief Query the name of the callback tracing kind. The name retrieved from this function is a
326 * string literal that is encoded in the read-only section of the binary (i.e. it is always
327 * "allocated" and never "deallocated").
328 *
329 * @param [in] kind Callback tracing domain
330 * @param [in] operation Enumeration id value which maps to a specific API function or event type
331 * @param [out] name If non-null and the name is a constant string that does not require dynamic
332 * allocation, this paramter will be set to the address of the string literal, otherwise it will
333 * be set to nullptr
334 * @param [out] name_len If non-null, this will be assigned the length of the name (regardless of
335 * the name is a constant string or requires dynamic allocation)
336 * @return ::rocprofiler_status_t
337 * @retval ::ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND Domain id is not valid
338 * @retval ::ROCPROFILER_STATUS_SUCCESS Valid domain provided, regardless if there is a constant
339 * string or not.
340 */
343 uint32_t operation,
344 const char** name,
345 uint64_t* name_len) ROCPROFILER_API;
346
347/**
348 * @brief Iterate over all the mappings of the callback tracing kinds and get a callback for each
349 * kind.
350 *
351 * @param [in] callback Callback function invoked for each enumeration value in @ref
352 * rocprofiler_callback_tracing_kind_t with the exception of the `NONE` and `LAST` values.
353 * @param [in] data User data passed back into the callback
354 * @return ::rocprofiler_status_t
355 */
356rocprofiler_status_t ROCPROFILER_API
358 void* data) ROCPROFILER_NONNULL(1);
359
360/**
361 * @brief Iterates over all the mappings of the operations for a given @ref
362 * rocprofiler_callback_tracing_kind_t and invokes the callback with the kind id, operation
363 * id, and user-provided data.
364 *
365 * @param [in] kind which tracing callback kind operations to iterate over
366 * @param [in] callback Callback function invoked for each operation associated with @ref
367 * rocprofiler_callback_tracing_kind_t with the exception of the `NONE` and `LAST` values.
368 * @param [in] data User data passed back into the callback
369 * @return ::rocprofiler_status_t
370 * @retval ::ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND Invalid domain id
371 * @retval ::ROCPROFILER_STATUS_SUCCESS Valid domain
372 */
373rocprofiler_status_t ROCPROFILER_API
377 void* data) ROCPROFILER_NONNULL(2);
378
379/**
380 * @brief Iterates over all the arguments for the traced function (when available). This is
381 * particularly useful when tools want to annotate traces with the function arguments. See
382 * @example samples/api_callback_tracing/client.cpp for a usage example.
383 *
384 * It is recommended to use this function when the record phase is ::ROCPROFILER_CALLBACK_PHASE_EXIT
385 * or ::ROCPROFILER_CALLBACK_PHASE_NONE. When the phase is ::ROCPROFILER_CALLBACK_PHASE_ENTER, the
386 * function may have output parameters which have not set. In the case of an output parameter with
387 * one level of indirection, e.g. `int* output_len`, this is considered safe since the output
388 * parameter is either null or, in the worst case scenario, pointing to an uninitialized value which
389 * will result in garbage values to be stringified. However, if the output parameter has more than
390 * one level of indirection, e.g. `const char** output_name`, this can result in a segmentation
391 * fault because the dereferenced output parameter may be uninitialized and point to an invalid
392 address. E.g.:
393 *
394 * @code{.cpp}
395 * struct dim3
396 * {
397 * int x;
398 * int y;
399 * int z;
400 * };
401 *
402 * static dim3 default_dims = {.x = 1, .y = 1, .z = 1};
403 *
404 * void set_dim_x(int val, dim3* output_dims) { output_dims->x = val; }
405 *
406 * void get_default_dims(dim3** output_dims) { *output_dims = default_dims; }
407 *
408 * int main()
409 * {
410 * dim3 my_dims; // uninitialized value. x, y, and z may be set to random values
411 * dim3* current_dims; // uninitialized pointer. May be set to invalid address
412 *
413 * set_dim_x(3, &my_dims); // if rocprofiler-sdk wrapped this function and tried to stringify
414 * // in the enter phase, dereferencing my_dims is not problematic
415 * // since there is an actual dim3 allocation
416 *
417 * get_default_dims(&current_dims); // if rocprofiler-sdk wrapped this function,
418 * // and tried to stringify in the enter phase,
419 * // current_dims may point to an address outside
420 * // of the address space of this process and
421 * // cause a segfault
422 * }
423 * @endcode
424 *
425 *
426 * @param[in] record Record provided by service callback
427 * @param[in] callback The callback function which will be invoked for each argument
428 * @param[in] max_dereference_count In the callback enter phase, certain arguments may be output
429 * parameters which have not been set. When the output parameter has multiple levels of indirection,
430 * it may be invalid to dereference the output parameter more than once and doing so may result in a
431 * segmentation fault. Thus, it is recommended to set this parameter to a maximum value of 1 when
432 * the phase is ::ROCPROFILER_CALLBACK_PHASE_ENTER to ensure that output parameters which point to
433 * uninitialized pointers do not cause segmentation faults.
434 * @param[in] user_data Data to be passed to each invocation of the callback
435 */
436rocprofiler_status_t ROCPROFILER_API
440 int32_t max_dereference_count,
441 void* user_data) ROCPROFILER_NONNULL(2);
442
443/** @} */
444
445ROCPROFILER_EXTERN_C_FINI
uint32_t rocprofiler_tracing_operation_t
Tracing Operation ID. Depending on the kind, operations can be determined. If the value is equal to z...
Definition fwd.h:448
rocprofiler_scratch_alloc_flag_t
Allocation flags for.
Definition fwd.h:334
rocprofiler_status_t
Status codes.
Definition fwd.h:55
uint64_t rocprofiler_timestamp_t
ROCProfiler Timestamp.
Definition fwd.h:429
rocprofiler_callback_tracing_kind_t
Service Callback Tracing Kind.
Definition fwd.h:155
Agent Identifier.
Definition fwd.h:541
Context ID.
Definition fwd.h:502
ROCProfiler kernel dispatch information.
Definition fwd.h:653
User-assignable data type.
Definition fwd.h:487
uint32_t group_segment_size
Size of static group segment memory required by the kernel (per work-group), in bytes....
rocprofiler_agent_id_t rocp_agent
The agent on which this loaded code object is loaded.
rocprofiler_timestamp_t start_timestamp
start time in nanoseconds
uint64_t code_object_id
unique code object identifier
uint32_t arch_vgpr_count
Architecture vector general purpose register count.
rocprofiler_timestamp_t end_timestamp
end time in nanoseconds
rocprofiler_kernel_dispatch_info_t dispatch_info
Dispatch info.
uint32_t kernarg_segment_alignment
Alignment (in bytes) of the buffer used to pass arguments to the kernel.
rocprofiler_timestamp_t start_timestamp
start time in nanoseconds
uint32_t accum_vgpr_count
Accum vector general purpose register count.
rocprofiler_code_object_storage_type_t storage_type
storage type of the code object reader used to load the loaded code object
hsa_agent_t hsa_agent
The agent on which this loaded code object is loaded.
uint64_t load_base
The base memory address at which the code object is loaded. This is the base address of the allocatio...
uint32_t kernarg_segment_size
size of memory (in bytes) allocated for kernel arguments. Will be multiple of 16
uint64_t load_size
The byte size of the loaded code objects contiguous memory allocation.
uint32_t private_segment_size
Size of static private, spill, and arg segment memory required by this kernel (per work-item),...
const char * uri
The URI name from which the code object was loaded.
rocprofiler_timestamp_t end_timestamp
end time in nanoseconds
uint64_t kernel_object
kernel object handle, used in the kernel dispatch packet
rocprofiler_agent_id_t dst_agent_id
destination agent of copy
int64_t load_delta
The signed byte address difference of the memory address at which the code object is loaded minus the...
rocprofiler_agent_id_t src_agent_id
source agent of copy
int(* rocprofiler_callback_tracing_operation_args_cb_t)(rocprofiler_callback_tracing_kind_t kind, uint32_t operation, uint32_t arg_number, const void *const arg_value_addr, int32_t arg_indirection_count, const char *arg_type, const char *arg_name, const char *arg_value_str, int32_t arg_dereference_count, void *data)
Callback function for iterating over the function arguments to a traced function. This function will ...
rocprofiler_status_t rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t context_id, rocprofiler_callback_tracing_kind_t kind, rocprofiler_tracing_operation_t *operations, unsigned long operations_count, rocprofiler_callback_tracing_cb_t callback, void *callback_args)
Configure Callback Tracing Service. The callback tracing service provides two synchronous callbacks a...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kind_operation_args(rocprofiler_callback_tracing_record_t record, rocprofiler_callback_tracing_operation_args_cb_t callback, int32_t max_dereference_count, void *user_data)
rocprofiler_status_t rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_tracing_kind_t kind, uint32_t operation, const char **name, uint64_t *name_len)
Query the name of the callback tracing kind. The name retrieved from this function is a string litera...
int(* rocprofiler_callback_tracing_kind_cb_t)(rocprofiler_callback_tracing_kind_t kind, void *data)
Callback function for mapping rocprofiler_callback_tracing_kind_t ids to string names.
rocprofiler_status_t rocprofiler_query_callback_tracing_kind_name(rocprofiler_callback_tracing_kind_t kind, const char **name, uint64_t *name_len)
Query the name of the callback tracing kind. The name retrieved from this function is a string litera...
rocprofiler_code_object_storage_type_t
ROCProfiler Enumeration for code object storage types (identical values to hsa_ven_amd_loader_code_ob...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kind_operations(rocprofiler_callback_tracing_kind_t kind, rocprofiler_callback_tracing_kind_operation_cb_t callback, void *data)
Iterates over all the mappings of the operations for a given rocprofiler_callback_tracing_kind_t and ...
int(* rocprofiler_callback_tracing_kind_operation_cb_t)(rocprofiler_callback_tracing_kind_t kind, uint32_t operation, void *data)
Callback function for mapping the operations of a given rocprofiler_callback_tracing_kind_t to string...
rocprofiler_status_t rocprofiler_iterate_callback_tracing_kinds(rocprofiler_callback_tracing_kind_cb_t callback, void *data)
Iterate over all the mappings of the callback tracing kinds and get a callback for each kind.
void(* rocprofiler_callback_tracing_cb_t)(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t *user_data, void *callback_data)
API Tracing callback function. This function is invoked twice per API function: once before the funct...
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_NONE
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE
@ ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_LAST
ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
ROCProfiler Code Object Load Tracer Callback Record.
ROCProfiler HIP runtime and compiler API Tracer Callback Data.
ROCProfiler HSA API Callback Data.
ROCProfiler Kernel Dispatch Callback Tracer Record.
ROCProfiler Marker Tracer Callback Data.
ROCProfiler Memory Copy Callback Tracer Record.
ROCProfiler Scratch Memory Callback Data.