This page contains proposed changes for a future release of ROCm. Read the latest Linux release of ROCm documentation for your production environments.

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/amd-master/include/rocprofiler/rocprofiler.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/amd-master/include/rocprofiler/rocprofiler.h Source File#

rocprofiler: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/amd-master/include/rocprofiler/rocprofiler.h Source File
rocprofiler.h
Go to the documentation of this file.
1 /******************************************************************************
2 Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 *******************************************************************************/
22 
24 //
25 // ROC Profiler API
26 //
27 // The goal of the implementation is to provide a HW specific low-level
28 // performance analysis interface for profiling of GPU compute applications.
29 // The profiling includes HW performance counters (PMC) with complex
30 // performance metrics and traces.
31 //
32 // The library can be used by a tool library loaded by HSA runtime or by
33 // higher level HW independent performance analysis API like PAPI.
34 //
35 // The library is written on C and will be based on AQLprofile AMD specific
36 // HSA extension. The library implementation requires HSA API intercepting and
37 // a profiling queue supporting a submit callback interface.
38 //
39 //
40 
41 #ifndef INC_ROCPROFILER_H_
42 #define INC_ROCPROFILER_H_
43 
44 /* Placeholder for calling convention and import/export macros */
45 #if !defined(ROCPROFILER_CALL)
46 #define ROCPROFILER_CALL
47 #endif /* !defined (ROCPROFILER_CALL) */
48 
49 #if !defined(ROCPROFILER_EXPORT_DECORATOR)
50 #if defined(__GNUC__)
51 #define ROCPROFILER_EXPORT_DECORATOR __attribute__((visibility("default")))
52 #elif defined(_MSC_VER)
53 #define ROCPROFILER_EXPORT_DECORATOR __declspec(dllexport)
54 #endif /* defined (_MSC_VER) */
55 #endif /* !defined (ROCPROFILER_EXPORT_DECORATOR) */
56 
57 #if !defined(ROCPROFILER_IMPORT_DECORATOR)
58 #if defined(__GNUC__)
59 #define ROCPROFILER_IMPORT_DECORATOR
60 #elif defined(_MSC_VER)
61 #define ROCPROFILER_IMPORT_DECORATOR __declspec(dllimport)
62 #endif /* defined (_MSC_VER) */
63 #endif /* !defined (ROCPROFILER_IMPORT_DECORATOR) */
64 
65 #define ROCPROFILER_EXPORT ROCPROFILER_EXPORT_DECORATOR ROCPROFILER_CALL
66 #define ROCPROFILER_IMPORT ROCPROFILER_IMPORT_DECORATOR ROCPROFILER_CALL
67 
68 #if !defined(ROCPROFILER)
69 #if defined(ROCPROFILER_EXPORTS)
70 #define ROCPROFILER_API ROCPROFILER_EXPORT
71 #else /* !defined (ROCPROFILER_EXPORTS) */
72 #define ROCPROFILER_API ROCPROFILER_IMPORT
73 #endif /* !defined (ROCPROFILER_EXPORTS) */
74 #endif /* !defined (ROCPROFILER) */
75 
76 #include <stddef.h>
77 #include <stdint.h>
78 
79 #ifdef __cplusplus
80 extern "C" {
81 #endif /* __cplusplus */
82 
83 #include <hsa/amd_hsa_kernel_code.h>
84 #include <hsa/hsa.h>
85 #include <hsa/hsa_ext_amd.h>
86 #include <hsa/hsa_ven_amd_aqlprofile.h>
87 #include <stdint.h>
88 
89 
90 #define ROCPROFILER_VERSION_MAJOR 8
91 #define ROCPROFILER_VERSION_MINOR 0
92 
94 // Returning library version
97 
99 // Global properties structure
100 
101 typedef struct {
102  uint32_t intercept_mode;
105  uint32_t trace_size;
106  uint32_t trace_local;
107  uint64_t timeout;
108  uint32_t timestamp_on;
110  uint32_t k_concurrent;
111  uint32_t opt_mode;
112  uint32_t obj_dumping;
114 
116 // Returning the error string method
117 
119  const char** str); // [out] the API error string pointer returning
120 
122 // Profiling features and data
123 //
124 // Profiling features objects have profiling feature info, type, parameters and data
125 // Also profiling data samplaes can be iterated using a callback
126 
127 // Profiling feature kind
128 typedef enum {
134 
135 // Profiling feture parameter
136 typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
137 
138 // Profiling data kind
139 typedef enum {
147 
148 // Profiling data type
149 typedef struct {
151  union {
152  uint32_t result_int32; // 32bit integer result
153  uint64_t result_int64; // 64bit integer result
154  float result_float; // float single-precision result
155  double result_double; // float double-precision result
156  struct {
157  void* ptr;
158  uint32_t size;
159  uint32_t instance_count;
160  bool copy;
161  } result_bytes; // data by ptr and byte size
162  };
164 
165 // Profiling feature type
166 typedef struct {
168  union {
169  const char* name; // feature name
170  struct {
171  const char* block; // counter block name
172  uint32_t event; // counter event id
173  } counter;
174  };
175  const rocprofiler_parameter_t* parameters; // feature parameters array
176  uint32_t parameter_count; // feature parameters count
177  rocprofiler_data_t data; // profiling data
179 
180 // Profiling features set type
182 
184 // Profiling context
185 //
186 // Profiling context object accumuate all profiling information
187 
188 // Profiling context object
189 typedef void rocprofiler_t;
190 
191 // Profiling group object
192 typedef struct {
193  unsigned index; // group index
194  rocprofiler_feature_t** features; // profiling info array
195  uint32_t feature_count; // profiling info count
196  rocprofiler_t* context; // context object
198 
199 // Profiling mode mask
200 typedef enum {
201  ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue
202  ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode
203  ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise
205 
206 // Profiling handler, calling on profiling completion
207 typedef bool (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg);
208 
209 // Profiling preperties
210 typedef struct {
211  hsa_queue_t* queue; // queue for STANDALONE mode
212  // the queue is created and returned in CREATEQUEUE mode
213  uint32_t queue_depth; // created queue depth
214  rocprofiler_handler_t handler; // handler on completion
215  void* handler_arg; // the handler arg
217 
218 // Create new profiling context
219 hsa_status_t rocprofiler_open(hsa_agent_t agent, // GPU handle
220  rocprofiler_feature_t* features, // [in] profiling features array
221  uint32_t feature_count, // profiling info count
222  rocprofiler_t** context, // [out] context object
223  uint32_t mode, // profiling mode mask
224  rocprofiler_properties_t* properties); // profiling properties
225 
226 // Add feature to a features set
228  const rocprofiler_feature_t* feature, // [in]
229  rocprofiler_feature_set_t* features_set); // [in/out] profiling features set
230 
231 // Create new profiling context
233  hsa_agent_t agent, // GPU handle
234  rocprofiler_feature_set_t* features_set, // [in] profiling features set
235  rocprofiler_t** context, // [out] context object
236  uint32_t mode, // profiling mode mask
237  rocprofiler_properties_t* properties); // profiling properties
238 
239 // Delete profiling info
240 hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling context
241 
242 // Context reset before reusing
243 hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context
244  uint32_t group_index); // group index
245 
246 // Return context agent
247 hsa_status_t rocprofiler_get_agent(rocprofiler_t* context, // [in] profiling context
248  hsa_agent_t* agent); // [out] GPU handle
249 
250 // Supported time value ID
251 typedef enum {
252  ROCPROFILER_TIME_ID_CLOCK_REALTIME = 0, // Linux realtime clock time
253  ROCPROFILER_TIME_ID_CLOCK_REALTIME_COARSE = 1, // Linux realtime-coarse clock time
254  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC = 2, // Linux monotonic clock time
255  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_COARSE = 3, // Linux monotonic-coarse clock time
256  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_RAW = 4, // Linux monotonic-raw clock time
258 
259 // Return time value for a given time ID and profiling timestamp
260 hsa_status_t rocprofiler_get_time(
261  rocprofiler_time_id_t time_id, // identifier of the particular time to convert the timesatmp
262  uint64_t timestamp, // profiling timestamp
263  uint64_t* value_ns, // [out] returned time 'ns' value, ignored if NULL
264  uint64_t* error_ns); // [out] returned time error 'ns' value, ignored if NULL
265 
267 // Queue callbacks
268 //
269 // Queue callbacks for initiating profiling per kernel dispatch and to wait
270 // the profiling data on the queue destroy.
271 
272 // Dispatch record
273 typedef struct {
274  uint64_t dispatch; // dispatch timestamp, ns
275  uint64_t begin; // kernel begin timestamp, ns
276  uint64_t end; // kernel end timestamp, ns
277  uint64_t complete; // completion signal timestamp, ns
279 
280 // Profiling callback data
281 typedef struct {
282  hsa_agent_t agent; // GPU agent handle
283  uint32_t agent_index; // GPU index (GPU Driver Node ID as reported in the sysfs topology)
284  const hsa_queue_t* queue; // HSA queue
285  uint64_t queue_index; // Index in the queue
286  uint32_t queue_id; // Queue id
287  hsa_signal_t completion_signal; // Completion signal
288  const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
289  const char* kernel_name; // Kernel name
290  uint64_t kernel_object; // Kernel object address
291  const amd_kernel_code_t* kernel_code; // Kernel code pointer
292  uint32_t thread_id; // Thread id
293  const rocprofiler_dispatch_record_t* record; // Dispatch record
295 
296 // Profiling callback type
297 typedef hsa_status_t (*rocprofiler_callback_t)(
298  const rocprofiler_callback_data_t* callback_data, // [in] callback data
299  void* user_data, // [in/out] user data passed to the callback
300  rocprofiler_group_t* group); // [out] returned profiling group
301 
302 // Queue callbacks
303 typedef struct {
304  rocprofiler_callback_t dispatch; // dispatch callback
305  hsa_status_t (*create)(hsa_queue_t* queue, void* data); // create callback
306  hsa_status_t (*destroy)(hsa_queue_t* queue, void* data); // destroy callback
308 
309 // Set queue callbacks
311  void* data); // [in/out] passed callbacks data
312 
313 // Remove queue callbacks
315 
316 // Start/stop queue callbacks
319 
321 // Start/stop profiling
322 //
323 // Start/stop the context profiling invocation, have to be as many as
324 // contect.invocations' to collect all profiling data
325 
326 // Start profiling
327 hsa_status_t rocprofiler_start(rocprofiler_t* context, // [in/out] profiling context
328  uint32_t group_index); // group index
329 
330 // Stop profiling
331 hsa_status_t rocprofiler_stop(rocprofiler_t* context, // [in/out] profiling context
332  uint32_t group_index); // group index
333 
334 // Read profiling
335 hsa_status_t rocprofiler_read(rocprofiler_t* context, // [in/out] profiling context
336  uint32_t group_index); // group index
337 
338 // Read profiling data
339 hsa_status_t rocprofiler_get_data(rocprofiler_t* context, // [in/out] profiling context
340  uint32_t group_index); // group index
341 
342 // Get profiling groups count
343 hsa_status_t rocprofiler_group_count(const rocprofiler_t* context, // [in] profiling context
344  uint32_t* group_count); // [out] profiling groups count
345 
346 // Get profiling group for a given index
347 hsa_status_t rocprofiler_get_group(rocprofiler_t* context, // [in] profiling context
348  uint32_t group_index, // profiling group index
349  rocprofiler_group_t* group); // [out] profiling group
350 
351 // Start profiling
352 hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group); // [in/out] profiling group
353 
354 // Stop profiling
355 hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group); // [in/out] profiling group
356 
357 // Read profiling
358 hsa_status_t rocprofiler_group_read(rocprofiler_group_t* group); // [in/out] profiling group
359 
360 // Get profiling data
361 hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group); // [in/out] profiling group
362 
363 // Get metrics data
364 hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* context); // [in/out] profiling context
365 
366 // Definition of output data iterator callback
367 typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
368 
369 // Method for iterating the events output data
371  rocprofiler_t* context, // [in] profiling context
372  rocprofiler_trace_data_callback_t callback, // callback to iterate the output data
373  void* data); // [in/out] callback data
374 
376 // Profiling features and data
377 //
378 // Profiling features objects have profiling feature info, type, parameters and data
379 // Also profiling data samplaes can be iterated using a callback
380 
381 // Profiling info kind
382 typedef enum {
383  ROCPROFILER_INFO_KIND_METRIC = 0, // metric info
384  ROCPROFILER_INFO_KIND_METRIC_COUNT = 1, // metric features count, int32
385  ROCPROFILER_INFO_KIND_TRACE = 2, // trace info
386  ROCPROFILER_INFO_KIND_TRACE_COUNT = 3, // trace features count, int32
387  ROCPROFILER_INFO_KIND_TRACE_PARAMETER = 4, // trace parameter info
388  ROCPROFILER_INFO_KIND_TRACE_PARAMETER_COUNT = 5 // trace parameter count, int32
390 
391 // Profiling info query
392 typedef union {
393  rocprofiler_info_kind_t info_kind; // queried profiling info kind
394  struct {
395  const char* trace_name; // queried info trace name
396  } trace_parameter;
398 
399 // Profiling info data
400 typedef struct {
401  uint32_t
402  agent_index; // GPU HSA agent index (GPU Driver Node ID as reported in the sysfs topology)
403  rocprofiler_info_kind_t kind; // info data kind
404  union {
405  struct {
406  const char* name; // metric name
407  uint32_t instances; // instances number
408  const char* expr; // metric expression, NULL for basic counters
409  const char* description; // metric description
410  const char* block_name; // block name
411  uint32_t block_counters; // number of block counters
412  } metric;
413  struct {
414  const char* name; // trace name
415  const char* description; // trace description
416  uint32_t parameter_count; // supported by the trace number parameters
417  } trace;
418  struct {
419  uint32_t code; // parameter code
420  const char* trace_name; // trace name
421  const char* parameter_name; // parameter name
422  const char* description; // trace parameter description
423  } trace_parameter;
424  };
426 
427 // Return the info for a given info kind
428 hsa_status_t rocprofiler_get_info(const hsa_agent_t* agent, // [in] GFXIP handle
429  rocprofiler_info_kind_t kind, // kind of iterated info
430  void* data); // [in/out] returned data
431 
432 // Iterate over the info for a given info kind, and invoke an application-defined callback on every
433 // iteration
434 hsa_status_t rocprofiler_iterate_info(const hsa_agent_t* agent, // [in] GFXIP handle
435  rocprofiler_info_kind_t kind, // kind of iterated info
436  hsa_status_t (*callback)(const rocprofiler_info_data_t info,
437  void* data), // callback
438  void* data); // [in/out] data passed to callback
439 
440 // Iterate over the info for a given info query, and invoke an application-defined callback on every
441 // iteration
442 hsa_status_t rocprofiler_query_info(const hsa_agent_t* agent, // [in] GFXIP handle
443  rocprofiler_info_query_t query, // iterated info query
444  hsa_status_t (*callback)(const rocprofiler_info_data_t info,
445  void* data), // callback
446  void* data); // [in/out] data passed to callback
447 
448 // Create a profiled queue. All dispatches on this queue will be profiled
450  hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
451  void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
452  uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
453 
455 // Profiling pool
456 //
457 // Support for profiling contexts pool
458 // The API provide capability to create a contexts pool for a given agent and a set of features,
459 // to fetch/relase a context entry, to register a callback for the contexts completion.
460 
461 // Profiling pool handle
462 typedef void rocprofiler_pool_t;
463 
464 // Profiling pool entry
465 typedef struct {
466  rocprofiler_t* context; // context object
467  void* payload; // payload data object
469 
470 // Profiling handler, calling on profiling completion
471 typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg);
472 
473 // Profiling preperties
474 typedef struct {
475  uint32_t num_entries; // pool size entries
476  uint32_t payload_bytes; // payload size bytes
477  rocprofiler_pool_handler_t handler; // handler on context completion
478  void* handler_arg; // the handler arg
480 
481 // Open profiling pool
483  hsa_agent_t agent, // GPU handle
484  rocprofiler_feature_t* features, // [in] profiling features array
485  uint32_t feature_count, // profiling info count
486  rocprofiler_pool_t** pool, // [out] context object
487  uint32_t mode, // profiling mode mask
488  rocprofiler_pool_properties_t*); // pool properties
489 
490 // Close profiling pool
491 hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle
492 
493 // Fetch profiling pool entry
495  rocprofiler_pool_t* pool, // profiling pool handle
496  rocprofiler_pool_entry_t* entry); // [out] empty profiling pool entry
497 
498 // Release profiling pool entry
500  rocprofiler_pool_entry_t* entry); // released profiling pool entry
501 
502 // Iterate fetched profiling pool entries
503 hsa_status_t rocprofiler_pool_iterate(rocprofiler_pool_t* pool, // profiling pool handle
504  hsa_status_t (*callback)(rocprofiler_pool_entry_t* entry,
505  void* data), // callback
506  void* data); // [in/out] data passed to callback
507 
508 // Flush completed entries in profiling pool
509 hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle
510 
512 // HSA intercepting API
513 
514 // HSA callbacks ID enumeration
515 typedef enum {
516  ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback
517  ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
518  ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
519  ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
520  ROCPROFILER_HSA_CB_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
521  ROCPROFILER_HSA_CB_ID_CODEOBJ = 5 // Loading/unloading of kernel symbol
523 
524 // HSA callback data type
525 typedef struct {
526  union {
527  struct {
528  const void* ptr; // allocated area ptr
529  size_t size; // allocated area size, zero size means 'free' callback
530  hsa_amd_segment_t segment; // allocated area's memory segment type
531  hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag
532  int is_code; // equal to 1 if code is allocated
533  } allocate;
534  struct {
535  hsa_device_type_t type; // type of assigned device
536  uint32_t id; // id of assigned device
537  hsa_agent_t agent; // device HSA agent handle
538  const void* ptr; // ptr the device is assigned to
539  } device;
540  struct {
541  const void* dst; // memcopy dst ptr
542  const void* src; // memcopy src ptr
543  size_t size; // memcopy size bytes
544  } memcopy;
545  struct {
546  const void* packet; // submitted to GPU packet
547  const char* kernel_name; // kernel name, not NULL if dispatch
548  hsa_queue_t* queue; // HSA queue the kernel was submitted to
549  uint32_t device_type; // type of device the packed is submitted to
550  uint32_t device_id; // id of device the packed is submitted to
551  } submit;
552  struct {
553  uint64_t object; // kernel symbol object
554  const char* name; // kernel symbol name
555  uint32_t name_length; // kernel symbol name length
556  int unload; // symbol executable destroy
557  } ksymbol;
558  struct {
559  uint32_t storage_type; // code object storage type
560  int storage_file; // origin file descriptor
561  uint64_t memory_base; // origin memory base
562  uint64_t memory_size; // origin memory size
563  uint64_t load_base; // codeobj load base
564  uint64_t load_size; // codeobj load size
565  uint64_t load_delta; // codeobj load size
566  uint32_t uri_length; // URI string length
567  char* uri; // URI string
568  int unload; // unload flag
569  } codeobj;
570  };
572 
573 // HSA callback function type
574 typedef hsa_status_t (*rocprofiler_hsa_callback_fun_t)(
575  rocprofiler_hsa_cb_id_t id, // callback id
576  const rocprofiler_hsa_callback_data_t* data, // [in] callback data
577  void* arg); // [in/out] user passed data
578 
579 // HSA callbacks structure
580 typedef struct {
581  rocprofiler_hsa_callback_fun_t allocate; // memory allocate callback
582  rocprofiler_hsa_callback_fun_t device; // agent assign callback
583  rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
584  rocprofiler_hsa_callback_fun_t submit; // packet submit callback
585  rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
586  rocprofiler_hsa_callback_fun_t codeobj; // codeobject load/unload callback
588 
589 // Set callbacks. If the callback is NULL then it is disabled.
590 // If callback returns a value that is not HSA_STATUS_SUCCESS the callback
591 // will be unregistered.
593  const rocprofiler_hsa_callbacks_t callbacks, // HSA callback function
594  void* arg); // callback user data
595 
596 #ifdef __cplusplus
597 } // extern "C" block
598 #endif // __cplusplus
599 
600 #endif // INC_ROCPROFILER_H_
rocprofiler_data_kind_t
Definition: rocprofiler.h:139
@ ROCPROFILER_DATA_KIND_DOUBLE
Definition: rocprofiler.h:144
@ ROCPROFILER_DATA_KIND_BYTES
Definition: rocprofiler.h:145
@ ROCPROFILER_DATA_KIND_INT32
Definition: rocprofiler.h:141
@ ROCPROFILER_DATA_KIND_INT64
Definition: rocprofiler.h:142
@ ROCPROFILER_DATA_KIND_FLOAT
Definition: rocprofiler.h:143
@ ROCPROFILER_DATA_KIND_UNINIT
Definition: rocprofiler.h:140
hsa_status_t rocprofiler_get_metrics(const rocprofiler_t *context)
hsa_status_t rocprofiler_query_info(const hsa_agent_t *agent, rocprofiler_info_query_t query, hsa_status_t(*callback)(const rocprofiler_info_data_t info, void *data), void *data)
hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t *context, rocprofiler_trace_data_callback_t callback, void *data)
hsa_status_t rocprofiler_get_info(const hsa_agent_t *agent, rocprofiler_info_kind_t kind, void *data)
hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t *group)
hsa_status_t(* rocprofiler_callback_t)(const rocprofiler_callback_data_t *callback_data, void *user_data, rocprofiler_group_t *group)
Definition: rocprofiler.h:297
hsa_status_t rocprofiler_pool_iterate(rocprofiler_pool_t *pool, hsa_status_t(*callback)(rocprofiler_pool_entry_t *entry, void *data), void *data)
rocprofiler_hsa_cb_id_t
Definition: rocprofiler.h:515
@ ROCPROFILER_HSA_CB_ID_ALLOCATE
Definition: rocprofiler.h:516
@ ROCPROFILER_HSA_CB_ID_DEVICE
Definition: rocprofiler.h:517
@ ROCPROFILER_HSA_CB_ID_KSYMBOL
Definition: rocprofiler.h:520
@ ROCPROFILER_HSA_CB_ID_SUBMIT
Definition: rocprofiler.h:519
@ ROCPROFILER_HSA_CB_ID_CODEOBJ
Definition: rocprofiler.h:521
@ ROCPROFILER_HSA_CB_ID_MEMCOPY
Definition: rocprofiler.h:518
hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_t *features, uint32_t feature_count, rocprofiler_t **context, uint32_t mode, rocprofiler_properties_t *properties)
hsa_status_t rocprofiler_start_queue_callbacks()
hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t *pool)
uint32_t rocprofiler_version_minor()
hsa_status_t rocprofiler_add_feature(const rocprofiler_feature_t *feature, rocprofiler_feature_set_t *features_set)
rocprofiler_info_kind_t
Definition: rocprofiler.h:382
@ ROCPROFILER_INFO_KIND_METRIC_COUNT
Definition: rocprofiler.h:384
@ ROCPROFILER_INFO_KIND_METRIC
Definition: rocprofiler.h:383
@ ROCPROFILER_INFO_KIND_TRACE_PARAMETER
Definition: rocprofiler.h:387
@ ROCPROFILER_INFO_KIND_TRACE_COUNT
Definition: rocprofiler.h:386
@ ROCPROFILER_INFO_KIND_TRACE
Definition: rocprofiler.h:385
@ ROCPROFILER_INFO_KIND_TRACE_PARAMETER_COUNT
Definition: rocprofiler.h:388
hsa_status_t rocprofiler_group_stop(rocprofiler_group_t *group)
hsa_status_t rocprofiler_group_start(rocprofiler_group_t *group)
hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, rocprofiler_feature_t *features, uint32_t feature_count, rocprofiler_pool_t **pool, uint32_t mode, rocprofiler_pool_properties_t *)
hsa_status_t rocprofiler_stop_queue_callbacks()
hsa_status_t rocprofiler_features_set_open(hsa_agent_t agent, rocprofiler_feature_set_t *features_set, rocprofiler_t **context, uint32_t mode, rocprofiler_properties_t *properties)
void rocprofiler_t
Definition: rocprofiler.h:189
rocprofiler_mode_t
Definition: rocprofiler.h:200
@ ROCPROFILER_MODE_CREATEQUEUE
Definition: rocprofiler.h:202
@ ROCPROFILER_MODE_SINGLEGROUP
Definition: rocprofiler.h:203
@ ROCPROFILER_MODE_STANDALONE
Definition: rocprofiler.h:201
hsa_status_t rocprofiler_get_group(rocprofiler_t *context, uint32_t group_index, rocprofiler_group_t *group)
void rocprofiler_feature_set_t
Definition: rocprofiler.h:181
hsa_status_t rocprofiler_group_count(const rocprofiler_t *context, uint32_t *group_count)
hsa_status_t rocprofiler_reset(rocprofiler_t *context, uint32_t group_index)
hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t
Definition: rocprofiler.h:136
bool(* rocprofiler_handler_t)(rocprofiler_group_t group, void *arg)
Definition: rocprofiler.h:207
hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t
Definition: rocprofiler.h:367
bool(* rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t *entry, void *arg)
Definition: rocprofiler.h:471
hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t *pool, rocprofiler_pool_entry_t *entry)
hsa_status_t rocprofiler_get_data(rocprofiler_t *context, uint32_t group_index)
hsa_status_t(* rocprofiler_hsa_callback_fun_t)(rocprofiler_hsa_cb_id_t id, const rocprofiler_hsa_callback_data_t *data, void *arg)
Definition: rocprofiler.h:574
hsa_status_t rocprofiler_iterate_info(const hsa_agent_t *agent, rocprofiler_info_kind_t kind, hsa_status_t(*callback)(const rocprofiler_info_data_t info, void *data), void *data)
hsa_status_t rocprofiler_get_agent(rocprofiler_t *context, hsa_agent_t *agent)
hsa_status_t rocprofiler_queue_create_profiled(hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type, void(*callback)(hsa_status_t status, hsa_queue_t *source, void *data), void *data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t **queue)
hsa_status_t rocprofiler_close(rocprofiler_t *context)
hsa_status_t rocprofiler_pool_release(rocprofiler_pool_entry_t *entry)
rocprofiler_feature_kind_t
Definition: rocprofiler.h:128
@ ROCPROFILER_FEATURE_KIND_METRIC
Definition: rocprofiler.h:129
@ ROCPROFILER_FEATURE_KIND_PCSMP_MOD
Definition: rocprofiler.h:132
@ ROCPROFILER_FEATURE_KIND_TRACE
Definition: rocprofiler.h:130
@ ROCPROFILER_FEATURE_KIND_SPM_MOD
Definition: rocprofiler.h:131
hsa_status_t rocprofiler_start(rocprofiler_t *context, uint32_t group_index)
hsa_status_t rocprofiler_remove_queue_callbacks()
hsa_status_t rocprofiler_error_string(const char **str)
uint32_t rocprofiler_version_major()
rocprofiler_time_id_t
Definition: rocprofiler.h:251
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_RAW
Definition: rocprofiler.h:256
@ ROCPROFILER_TIME_ID_CLOCK_REALTIME_COARSE
Definition: rocprofiler.h:253
@ ROCPROFILER_TIME_ID_CLOCK_REALTIME
Definition: rocprofiler.h:252
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_COARSE
Definition: rocprofiler.h:255
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC
Definition: rocprofiler.h:254
hsa_status_t rocprofiler_get_time(rocprofiler_time_id_t time_id, uint64_t timestamp, uint64_t *value_ns, uint64_t *error_ns)
hsa_status_t rocprofiler_read(rocprofiler_t *context, uint32_t group_index)
void rocprofiler_pool_t
Definition: rocprofiler.h:462
hsa_status_t rocprofiler_stop(rocprofiler_t *context, uint32_t group_index)
hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t *pool)
hsa_status_t rocprofiler_set_hsa_callbacks(const rocprofiler_hsa_callbacks_t callbacks, void *arg)
hsa_status_t rocprofiler_group_read(rocprofiler_group_t *group)
hsa_status_t rocprofiler_set_queue_callbacks(rocprofiler_queue_callbacks_t callbacks, void *data)
Definition: rocprofiler.h:281
hsa_agent_t agent
Definition: rocprofiler.h:282
uint32_t queue_id
Definition: rocprofiler.h:286
uint64_t kernel_object
Definition: rocprofiler.h:290
hsa_signal_t completion_signal
Definition: rocprofiler.h:287
const rocprofiler_dispatch_record_t * record
Definition: rocprofiler.h:293
uint64_t queue_index
Definition: rocprofiler.h:285
const amd_kernel_code_t * kernel_code
Definition: rocprofiler.h:291
const hsa_queue_t * queue
Definition: rocprofiler.h:284
uint32_t agent_index
Definition: rocprofiler.h:283
const hsa_kernel_dispatch_packet_t * packet
Definition: rocprofiler.h:288
const char * kernel_name
Definition: rocprofiler.h:289
uint32_t thread_id
Definition: rocprofiler.h:292
Definition: rocprofiler.h:149
rocprofiler_data_kind_t kind
Definition: rocprofiler.h:150
uint32_t instance_count
Definition: rocprofiler.h:159
uint32_t result_int32
Definition: rocprofiler.h:152
void * ptr
Definition: rocprofiler.h:157
bool copy
Definition: rocprofiler.h:160
double result_double
Definition: rocprofiler.h:155
float result_float
Definition: rocprofiler.h:154
uint32_t size
Definition: rocprofiler.h:158
uint64_t result_int64
Definition: rocprofiler.h:153
Definition: rocprofiler.h:273
uint64_t end
Definition: rocprofiler.h:276
uint64_t complete
Definition: rocprofiler.h:277
uint64_t begin
Definition: rocprofiler.h:275
uint64_t dispatch
Definition: rocprofiler.h:274
Definition: rocprofiler.h:166
const rocprofiler_parameter_t * parameters
Definition: rocprofiler.h:175
rocprofiler_feature_kind_t kind
Definition: rocprofiler.h:167
uint32_t parameter_count
Definition: rocprofiler.h:176
const char * block
Definition: rocprofiler.h:171
const char * name
Definition: rocprofiler.h:169
uint32_t event
Definition: rocprofiler.h:172
rocprofiler_data_t data
Definition: rocprofiler.h:177
Definition: rocprofiler.h:192
uint32_t feature_count
Definition: rocprofiler.h:195
rocprofiler_feature_t ** features
Definition: rocprofiler.h:194
rocprofiler_t * context
Definition: rocprofiler.h:196
unsigned index
Definition: rocprofiler.h:193
Definition: rocprofiler.h:525
hsa_agent_t agent
Definition: rocprofiler.h:537
uint32_t uri_length
Definition: rocprofiler.h:566
char * uri
Definition: rocprofiler.h:567
hsa_amd_segment_t segment
Definition: rocprofiler.h:530
hsa_queue_t * queue
Definition: rocprofiler.h:548
uint32_t device_id
Definition: rocprofiler.h:550
const void * packet
Definition: rocprofiler.h:546
const void * src
Definition: rocprofiler.h:542
uint64_t load_base
Definition: rocprofiler.h:563
uint32_t name_length
Definition: rocprofiler.h:555
int is_code
Definition: rocprofiler.h:532
uint64_t load_size
Definition: rocprofiler.h:564
int storage_file
Definition: rocprofiler.h:560
uint32_t id
Definition: rocprofiler.h:536
const char * name
Definition: rocprofiler.h:554
uint64_t load_delta
Definition: rocprofiler.h:565
uint64_t memory_size
Definition: rocprofiler.h:562
size_t size
Definition: rocprofiler.h:529
hsa_device_type_t type
Definition: rocprofiler.h:535
uint32_t storage_type
Definition: rocprofiler.h:559
const char * kernel_name
Definition: rocprofiler.h:547
uint32_t device_type
Definition: rocprofiler.h:549
uint64_t object
Definition: rocprofiler.h:553
int unload
Definition: rocprofiler.h:556
const void * dst
Definition: rocprofiler.h:541
const void * ptr
Definition: rocprofiler.h:528
uint64_t memory_base
Definition: rocprofiler.h:561
hsa_amd_memory_pool_global_flag_t global_flag
Definition: rocprofiler.h:531
Definition: rocprofiler.h:580
rocprofiler_hsa_callback_fun_t allocate
Definition: rocprofiler.h:581
rocprofiler_hsa_callback_fun_t device
Definition: rocprofiler.h:582
rocprofiler_hsa_callback_fun_t codeobj
Definition: rocprofiler.h:586
rocprofiler_hsa_callback_fun_t submit
Definition: rocprofiler.h:584
rocprofiler_hsa_callback_fun_t ksymbol
Definition: rocprofiler.h:585
rocprofiler_hsa_callback_fun_t memcopy
Definition: rocprofiler.h:583
Definition: rocprofiler.h:400
const char * block_name
Definition: rocprofiler.h:410
uint32_t agent_index
Definition: rocprofiler.h:402
const char * description
Definition: rocprofiler.h:409
uint32_t code
Definition: rocprofiler.h:419
rocprofiler_info_kind_t kind
Definition: rocprofiler.h:403
const char * name
Definition: rocprofiler.h:406
const char * expr
Definition: rocprofiler.h:408
uint32_t parameter_count
Definition: rocprofiler.h:416
const char * trace_name
Definition: rocprofiler.h:420
const char * parameter_name
Definition: rocprofiler.h:421
uint32_t block_counters
Definition: rocprofiler.h:411
uint32_t instances
Definition: rocprofiler.h:407
Definition: rocprofiler.h:465
rocprofiler_t * context
Definition: rocprofiler.h:466
void * payload
Definition: rocprofiler.h:467
Definition: rocprofiler.h:474
uint32_t payload_bytes
Definition: rocprofiler.h:476
void * handler_arg
Definition: rocprofiler.h:478
uint32_t num_entries
Definition: rocprofiler.h:475
rocprofiler_pool_handler_t handler
Definition: rocprofiler.h:477
Definition: rocprofiler.h:210
hsa_queue_t * queue
Definition: rocprofiler.h:211
void * handler_arg
Definition: rocprofiler.h:215
uint32_t queue_depth
Definition: rocprofiler.h:213
rocprofiler_handler_t handler
Definition: rocprofiler.h:214
Definition: rocprofiler.h:303
rocprofiler_callback_t dispatch
Definition: rocprofiler.h:304
Definition: rocprofiler.h:101
uint32_t trace_size
Definition: rocprofiler.h:105
uint32_t obj_dumping
Definition: rocprofiler.h:112
uint32_t k_concurrent
Definition: rocprofiler.h:110
uint32_t hsa_intercepting
Definition: rocprofiler.h:109
uint32_t code_obj_tracking
Definition: rocprofiler.h:103
uint32_t timestamp_on
Definition: rocprofiler.h:108
uint32_t opt_mode
Definition: rocprofiler.h:111
uint32_t intercept_mode
Definition: rocprofiler.h:102
uint32_t memcopy_tracking
Definition: rocprofiler.h:104
uint64_t timeout
Definition: rocprofiler.h:107
uint32_t trace_local
Definition: rocprofiler.h:106
Definition: rocprofiler.h:392
const char * trace_name
Definition: rocprofiler.h:395
rocprofiler_info_kind_t info_kind
Definition: rocprofiler.h:393