Thread Trace Service

Thread Trace Service#

ROCprofiler-SDK developer API: Thread Trace Service
ROCprofiler-SDK developer API 1.0.0
ROCm Profiling API and tools
Thread Trace Service

Provides API calls to enable and handle thread trace data. More...

Data Structures

struct  rocprofiler_thread_trace_parameter_t
 Thread Trace parameter specification. More...
 
struct  rocprofiler_thread_trace_decoder_id_t
 Handle containing a loaded rocprof-trace-decoder and a decoder state. More...
 
struct  rocprofiler_thread_trace_decoder_pc_t
 Describes a PC address. More...
 
struct  rocprofiler_thread_trace_decoder_perfevent_t
 Describes four performance counter values. More...
 
struct  rocprofiler_thread_trace_decoder_occupancy_t
 Describes an occupancy event (wave started or wave ended). More...
 
struct  rocprofiler_thread_trace_decoder_wave_state_t
 A wave state change event. More...
 
struct  rocprofiler_thread_trace_decoder_inst_t
 Describes an instruction execution event. More...
 
struct  rocprofiler_thread_trace_decoder_wave_t
 Struct describing a wave during it's lifetime. This record is only generated for waves executing in the target_cu and target_simd, selected by ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT. More...
 
struct  rocprofiler_thread_trace_decoder_realtime_t
 Matches the reference (realtime) clock with the shader clock Added in rocprof-trace-decoder 0.1.3. Requires aqlprofile for rocm 7.1+. clock_in_seconds = realtime_clock / ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY gfx_frequency = delta(shader_clock) / delta(clock_in_seconds) For best average, use gfx_frequency[n] = (shader_clock[n]-shader_clock[0]) / (clock_in_seconds[n]-clock_in_seconds[0]) More...
 
struct  rocprofiler_thread_trace_decoder_shaderdata_t
 Record created by s_ttracedata and s_ttracedata_imm Added in rocprof-trace-decoder 0.1.3. More...
 

Typedefs

typedef void(* rocprofiler_thread_trace_shader_data_callback_t) (rocprofiler_agent_id_t agent, int64_t shader_engine_id, void *data, unsigned long data_size, rocprofiler_user_data_t userdata)
 Callback to be triggered every time some ATT data is generated by the device.
 
typedef rocprofiler_thread_trace_control_flags_t(* rocprofiler_thread_trace_dispatch_callback_t) (rocprofiler_agent_id_t agent_id, rocprofiler_queue_id_t queue_id, rocprofiler_async_correlation_id_t correlation_id, rocprofiler_kernel_id_t kernel_id, rocprofiler_dispatch_id_t dispatch_id, void *userdata_config, rocprofiler_user_data_t *userdata_shader)
 Callback to be triggered every kernel dispatch, indicating to start and/or stop ATT.
 
typedef void(* rocprofiler_thread_trace_decoder_callback_t) (rocprofiler_thread_trace_decoder_record_type_t record_type_id, void *trace_events, uint64_t trace_size, void *userdata)
 Callback for rocprof-trace-decoder to return decoder traces back to user.
 

Enumerations

enum  rocprofiler_thread_trace_parameter_type_t {
  ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU = 0 ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER_EXCLUDE_MASK ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_NO_DETAIL ,
  ROCPROFILER_THREAD_TRACE_PARAMETER_LAST
}
 Types of Thread Trace parameters. More...
 
enum  rocprofiler_thread_trace_control_flags_t {
  ROCPROFILER_THREAD_TRACE_CONTROL_NONE = 0 ,
  ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP = 3
}
 
enum  rocprofiler_thread_trace_decoder_info_t {
  ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0 ,
  ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST ,
  ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE ,
  ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE ,
  ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST
}
 Describes the type of info received. More...
 
enum  rocprofiler_thread_trace_decoder_wstate_type_t {
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0 ,
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE ,
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC ,
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT ,
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL ,
  ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST
}
 Wave state type. More...
 
enum  rocprofiler_thread_trace_decoder_inst_category_t {
  ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0 ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH ,
  ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST
}
 Instruction type. More...
 
enum  rocprofiler_thread_trace_decoder_shaderdata_flags_t { ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM = 0 }
 Bitmask of additional information for shaderdata_t Added in rocprof-trace-decoder 0.1.3. More...
 
enum  rocprofiler_thread_trace_decoder_record_type_t {
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0 ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME ,
  ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY
}
 Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t. More...
 

Functions

rocprofiler_status_t rocprofiler_configure_device_thread_trace_service (rocprofiler_context_id_t context_id, rocprofiler_agent_id_t agent_id, rocprofiler_thread_trace_parameter_t *parameters, unsigned long num_parameters, rocprofiler_thread_trace_shader_data_callback_t shader_callback, rocprofiler_user_data_t callback_userdata)
 Configure Thread Trace Service for agent. There may only be one agent profile configured per context and can be only one active context that is profiling a single agent at a time. Multiple agent contexts can be started at the same time if they are profiling different agents.
 
rocprofiler_status_t rocprofiler_configure_dispatch_thread_trace_service (rocprofiler_context_id_t context_id, rocprofiler_agent_id_t agent_id, rocprofiler_thread_trace_parameter_t *parameters, unsigned long num_parameters, rocprofiler_thread_trace_dispatch_callback_t dispatch_callback, rocprofiler_thread_trace_shader_data_callback_t shader_callback, void *callback_userdata)
 Enables the thread trace service for dispatch-based tracing. The tool has an option to enable/disable thread trace on every dispatch callback. This service serializes all traced kernels, and optionally all non-traced kernels.
 
rocprofiler_status_t rocprofiler_thread_trace_decoder_create (rocprofiler_thread_trace_decoder_id_t *handle, const char *path)
 Initializes Trace Decoder library with a library search path.
 
void rocprofiler_thread_trace_decoder_destroy (rocprofiler_thread_trace_decoder_id_t handle)
 Deletes handle created by rocprofiler_thread_trace_decoder_create.
 
rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_load (rocprofiler_thread_trace_decoder_id_t handle, uint64_t load_id, uint64_t load_addr, uint64_t load_size, const void *data, uint64_t size)
 Loads a code object binary to match with Thread Trace. The size, data and load_* are reported by rocprofiler-sdk's code object tracing service. Used for the decoder library to know what code objects to look into when decoding shader data. Not all application code objects are required to be reported here, only the ones containing code executed at the time the shader data was collected by thread_trace services. If a code object not reported here is encountered while decoding shader data, a record of type INFO_STITCH_INCOMPLETE will be generated and instructions will not be reported with a PC address.
 
rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_unload (rocprofiler_thread_trace_decoder_id_t handle, uint64_t load_id)
 Unloads a code object binary.
 
rocprofiler_status_t rocprofiler_trace_decode (rocprofiler_thread_trace_decoder_id_t handle, rocprofiler_thread_trace_decoder_callback_t callback, void *data, uint64_t size, void *userdata)
 Decodes shader data returned by rocprofiler_thread_trace_shader_data_callback_t. Use rocprofiler_thread_trace_decoder_codeobj_load to add references to loaded code objects during the trace. A rocprofiler_thread_trace_decoder_callback_t returns decoded data back to user. The first record is always of type ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP.
 
const char * rocprofiler_thread_trace_decoder_info_string (rocprofiler_thread_trace_decoder_id_t handle, rocprofiler_thread_trace_decoder_info_t info)
 Returns the string description of a rocprofiler_thread_trace_decoder_info_t record.
 

Detailed Description

Provides API calls to enable and handle thread trace data.

ROCprof-trace-decoder defined types. All timestamp values are in shader clock units.

ROCprof-trace-decoder wrapper. Provides API calls to decode thread trace shader data.


Data Structure Documentation

◆ rocprofiler_thread_trace_parameter_t

struct rocprofiler_thread_trace_parameter_t

Thread Trace parameter specification.

Definition at line 65 of file core.h.

+ Collaboration diagram for rocprofiler_thread_trace_parameter_t:
Data Fields
union rocprofiler_thread_trace_parameter_t.__unnamed12__ __unnamed__
rocprofiler_thread_trace_parameter_type_t type

◆ rocprofiler_thread_trace_decoder_id_t

struct rocprofiler_thread_trace_decoder_id_t

Handle containing a loaded rocprof-trace-decoder and a decoder state.

Definition at line 39 of file trace_decoder.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_id_t:
Data Fields
uint64_t handle

◆ rocprofiler_thread_trace_decoder_pc_t

struct rocprofiler_thread_trace_decoder_pc_t

Describes a PC address.

Definition at line 50 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_pc_t:
Data Fields
uint64_t address Address (code_object_id == 0), or ELF vaddr (code_object_id != 0)
uint64_t code_object_id Zero if no code object was found.

◆ rocprofiler_thread_trace_decoder_perfevent_t

struct rocprofiler_thread_trace_decoder_perfevent_t

Describes four performance counter values.

Definition at line 59 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_perfevent_t:
Data Fields
uint8_t bank Selects counter group [0,3] or [4,7].
uint8_t CU Shader compute unit ID these counters were collected from.
uint16_t events0 Counter0 (bank==0) or Counter4 (bank==1).
uint16_t events1 Counter1 (bank==0) or Counter5 (bank==1).
uint16_t events2 Counter2 (bank==0) or Counter6 (bank==1).
uint16_t events3 Counter3 (bank==0) or Counter7 (bank==1).
int64_t time Shader clock timestamp in which these counters were read.

◆ rocprofiler_thread_trace_decoder_occupancy_t

struct rocprofiler_thread_trace_decoder_occupancy_t

Describes an occupancy event (wave started or wave ended).

Definition at line 73 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_occupancy_t:
Data Fields
uint32_t _rsvd: 31
uint8_t cu Compute unit ID (gfx9) or WGP ID (gfx10+).
rocprofiler_thread_trace_decoder_pc_t pc Wave start address (kernel entry point)
uint8_t reserved Reserved.
uint8_t simd SIMD ID [0,3] within compute unit.
uint32_t start: 1 1 if wave_start, 0 if a wave_end
uint64_t time Timestamp of event.
uint8_t wave_id Wave slot ID within SIMD.

◆ rocprofiler_thread_trace_decoder_wave_state_t

struct rocprofiler_thread_trace_decoder_wave_state_t

A wave state change event.

Definition at line 101 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_wave_state_t:
Data Fields
int32_t duration state duration in cycles
int32_t type one of rocprofiler_thread_trace_decoder_waveslot_state_type_t

◆ rocprofiler_thread_trace_decoder_inst_t

struct rocprofiler_thread_trace_decoder_inst_t

Describes an instruction execution event.

The duration is measured as stall+issue time (gfx9) or stall+execution time (gfx10+). Time + duration marks the issue (gfx9) or execution (gfx10+) completion time. Time + stall marks the successful issue time. Duration - stall is the issue time (gfx9) or execution time (gfx10+).

Definition at line 136 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_inst_t:
Data Fields
uint32_t category: 8 One of rocprofiler_thread_trace_decoder_inst_category_t.
int32_t duration Total instruction duration, in clock cycles.
rocprofiler_thread_trace_decoder_pc_t pc
uint32_t stall: 24 Stall duration, in clock cycles.
int64_t time When the wave first attempted to execute this instruction.

◆ rocprofiler_thread_trace_decoder_wave_t

struct rocprofiler_thread_trace_decoder_wave_t

Struct describing a wave during it's lifetime. This record is only generated for waves executing in the target_cu and target_simd, selected by ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT.

instructions_array contains a time-ordered list of all (traced) instructions by the wave.

Definition at line 152 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_wave_t:
Data Fields
uint32_t _rsvd1
uint32_t _rsvd2
uint32_t _rsvd3
int64_t begin_time Wave begin time. Should match occupancy event wave start.
uint8_t contexts Counts how many CWSR events have occured during the wave lifetime.
uint8_t cu CU id (gfx9) or wgp id (gfx10+). This is always the target_cu.
int64_t end_time Wave end time. Should match occupancy event wave end.
rocprofiler_thread_trace_decoder_inst_t * instructions_array Instructions executed.
uint64_t instructions_size instructions_array size
uint8_t simd SIMD ID [0,3].
rocprofiler_thread_trace_decoder_wave_state_t * timeline_array wave state change events
uint64_t timeline_size timeline_array size
uint8_t wave_id Wave slot ID within SIMD.

◆ rocprofiler_thread_trace_decoder_realtime_t

struct rocprofiler_thread_trace_decoder_realtime_t

Matches the reference (realtime) clock with the shader clock Added in rocprof-trace-decoder 0.1.3. Requires aqlprofile for rocm 7.1+. clock_in_seconds = realtime_clock / ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY gfx_frequency = delta(shader_clock) / delta(clock_in_seconds) For best average, use gfx_frequency[n] = (shader_clock[n]-shader_clock[0]) / (clock_in_seconds[n]-clock_in_seconds[0])

Definition at line 180 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_realtime_t:
Data Fields
uint64_t realtime_clock Clock timestamp in realtime units.
uint64_t reserved
int64_t shader_clock Clock timestamp in gfx clock units.

◆ rocprofiler_thread_trace_decoder_shaderdata_t

struct rocprofiler_thread_trace_decoder_shaderdata_t

Record created by s_ttracedata and s_ttracedata_imm Added in rocprof-trace-decoder 0.1.3.

Definition at line 204 of file trace_decoder_types.h.

+ Collaboration diagram for rocprofiler_thread_trace_decoder_shaderdata_t:
Data Fields
uint8_t cu CU id (gfx9) or wgp id (gfx10+).
uint8_t flags bitmask of rocprofiler_thread_trace_decoder_shaderdata_flags_t
uint32_t reserved
uint8_t simd SIMD ID [0,3].
int64_t time
uint64_t value Value written from M0/IMM.
uint8_t wave_id Wave slot ID within SIMD.

Typedef Documentation

◆ rocprofiler_thread_trace_decoder_callback_t

typedef void(* rocprofiler_thread_trace_decoder_callback_t) (rocprofiler_thread_trace_decoder_record_type_t record_type_id, void *trace_events, uint64_t trace_size, void *userdata)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Callback for rocprof-trace-decoder to return decoder traces back to user.

Parameters
[in]record_type_idOne of rocprofiler_thread_trace_decoder_record_type_t
[in]trace_eventsA pointer to sequence of events, of size trace_size.
[in]trace_sizeThe number of events in the trace.
[in]userdataArbitrary data pointer to be sent back to the user via callback.

Definition at line 113 of file trace_decoder.h.

◆ rocprofiler_thread_trace_dispatch_callback_t

typedef rocprofiler_thread_trace_control_flags_t(* rocprofiler_thread_trace_dispatch_callback_t) (rocprofiler_agent_id_t agent_id, rocprofiler_queue_id_t queue_id, rocprofiler_async_correlation_id_t correlation_id, rocprofiler_kernel_id_t kernel_id, rocprofiler_dispatch_id_t dispatch_id, void *userdata_config, rocprofiler_user_data_t *userdata_shader)

#include <rocprofiler-sdk/experimental/thread-trace/dispatch.h>

Callback to be triggered every kernel dispatch, indicating to start and/or stop ATT.

Parameters
[in]agent_idagent_id.
[in]queue_idqueue_id.
[in]correlation_idinternal correlation id.
[in]kernel_idkernel_id.
[in]dispatch_iddispatch_id.
[in]userdata_configUserdata passed back from rocprofiler_configure_dispatch_thread_trace_service.
[out]userdata_shaderUserdata to be passed in shader_callback

Definition at line 57 of file dispatch.h.

◆ rocprofiler_thread_trace_shader_data_callback_t

typedef void(* rocprofiler_thread_trace_shader_data_callback_t) (rocprofiler_agent_id_t agent, int64_t shader_engine_id, void *data, unsigned long data_size, rocprofiler_user_data_t userdata)

#include <rocprofiler-sdk/experimental/thread-trace/core.h>

Callback to be triggered every time some ATT data is generated by the device.

Parameters
[in]agentIdentifier for the target agent (
See also
rocprofiler_agent_id_t)
Parameters
[in]shader_engine_idID of shader engine, as enabled by SE_MASK
[in]dataPointer to the buffer containing the ATT data
[in]data_sizeNumber of bytes in "data"
[in]userdataPassed back to user from rocprofiler_thread_trace_dispatch_callback_t()

Definition at line 87 of file core.h.

Enumeration Type Documentation

◆ rocprofiler_thread_trace_control_flags_t

enum rocprofiler_thread_trace_control_flags_t

#include <rocprofiler-sdk/experimental/thread-trace/dispatch.h>

Definition at line 40 of file dispatch.h.

41{
42 ROCPROFILER_THREAD_TRACE_CONTROL_NONE = 0,
43 ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP = 3
44} rocprofiler_thread_trace_control_flags_t;

◆ rocprofiler_thread_trace_decoder_info_t

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Describes the type of info received.

Definition at line 38 of file trace_decoder_types.h.

39{
40 ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0,
41 ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST,
42 ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE,
43 ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE,
44 ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST
rocprofiler_thread_trace_decoder_info_t
Describes the type of info received.

◆ rocprofiler_thread_trace_decoder_inst_category_t

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Instruction type.

Enumerator
ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM 

Scalar memory op.

ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU 

Scalar ALU op.

ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM 

Vector memory op.

ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT 

Flat addressing vmem or lds.

ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS 

Local Data Share op.

ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU 

Vector ALU op.

ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP 

Branch taken.

ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT 

Branch not taken.

ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED 

Internal operation.

ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT 

Wave context switch.

ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE 

MSG types.

ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH 

Raytrace op.

Definition at line 110 of file trace_decoder_types.h.

111{
112 ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0,
113 ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM, ///< Scalar memory op
115 ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM, ///< Vector memory op
116 ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT, ///< Flat addressing vmem or lds
117 ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS, ///< Local Data Share op
120 ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT, ///< Branch not taken
121 ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED, ///< Internal operation
122 ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT, ///< Wave context switch
125 ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST
rocprofiler_thread_trace_decoder_inst_category_t
Instruction type.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH
Raytrace op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU
Vector ALU op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT
Flat addressing vmem or lds.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED
Internal operation.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT
Wave context switch.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS
Local Data Share op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM
Scalar memory op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT
Branch not taken.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU
Scalar ALU op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE
MSG types.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM
Vector memory op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP
Branch taken.

◆ rocprofiler_thread_trace_decoder_record_type_t

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t.

Enumerator
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP 

Record is gfxip_major, type uint64_t.

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY 

rocprofiler_thread_trace_decoder_occupancy_t*

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT 

rocprofiler_thread_trace_decoder_perfevent_t*

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE 

rocprofiler_thread_trace_decoder_wave_t*

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO 

rocprofiler_thread_trace_decoder_info_t*

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG 

Debug.

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA 

rocprofiler_thread_trace_decoder_shaderdata_t*

ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME 

rocprofiler_thread_trace_decoder_realtime_t*

Definition at line 218 of file trace_decoder_types.h.

219{
220 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type uint64_t
221 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t*
222 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t*
223 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t*
224 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, ///< rocprofiler_thread_trace_decoder_info_t*
226 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA, ///< rocprofiler_thread_trace_decoder_shaderdata_t*
227 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME, ///< rocprofiler_thread_trace_decoder_realtime_t*
228 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY,
229 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST
230
231 /// @var ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY
232 /// @brief uint64_t*. Realtime clock frequency in Hz.
rocprofiler_thread_trace_decoder_record_type_t
Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP
Record is gfxip_major, type uint64_t.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA
rocprofiler_thread_trace_decoder_shaderdata_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG
Debug.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO
rocprofiler_thread_trace_decoder_info_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY
rocprofiler_thread_trace_decoder_occupancy_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE
rocprofiler_thread_trace_decoder_wave_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME
rocprofiler_thread_trace_decoder_realtime_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT
rocprofiler_thread_trace_decoder_perfevent_t*

◆ rocprofiler_thread_trace_decoder_shaderdata_flags_t

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Bitmask of additional information for shaderdata_t Added in rocprof-trace-decoder 0.1.3.

Definition at line 191 of file trace_decoder_types.h.

192{
193 ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM = 0,
194 ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV ///< Generated by the trap handler
195
196 /// @var ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM
197 /// @brief Value comes from s_ttracedata_imm.
rocprofiler_thread_trace_decoder_shaderdata_flags_t
Bitmask of additional information for shaderdata_t Added in rocprof-trace-decoder 0....

◆ rocprofiler_thread_trace_decoder_wstate_type_t

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Wave state type.

Definition at line 88 of file trace_decoder_types.h.

89{
90 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0,
91 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE,
92 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC,
93 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT,
94 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL,
95 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST,
rocprofiler_thread_trace_decoder_wstate_type_t
Wave state type.

◆ rocprofiler_thread_trace_parameter_type_t

#include <rocprofiler-sdk/experimental/thread-trace/core.h>

Types of Thread Trace parameters.

Enumerator
ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU 

Select the Target CU or WGP.

ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK 

Bitmask of shader engines.

ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE 

Size of combined GPU buffer for ATT.

ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT 

Bitmask (GFX9) or ID (Navi) of SIMDs.

ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL 

Period [1,32] or disable (0) perfmon.

ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER 

Perfmon ID and SIMD mask. gfx9 only.

ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL 

Serializes also kernels not under thread trace.

ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER_EXCLUDE_MASK 

Bitmask of which compute units to exclude from perfcounters. gfx9 only.

ROCPROFILER_THREAD_TRACE_PARAMETER_NO_DETAIL 

Dont collect instruction timing, only shader-wide information.

Definition at line 43 of file core.h.

44{
45 ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU = 0, ///< Select the Target CU or WGP
46 ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines.
47 ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT
48 ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs
49 ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon
50 ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask. gfx9 only
51 ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL, ///< Serializes also kernels not under
52 ///< thread trace
54 ///< units to exclude from
55 ///< perfcounters. gfx9 only
56 ROCPROFILER_THREAD_TRACE_PARAMETER_NO_DETAIL, ///< Dont collect instruction timing,
57 ///< only shader-wide information
58 ROCPROFILER_THREAD_TRACE_PARAMETER_LAST
rocprofiler_thread_trace_parameter_type_t
Types of Thread Trace parameters.
Definition core.h:44
@ ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER_EXCLUDE_MASK
Bitmask of which compute units to exclude from perfcounters. gfx9 only.
Definition core.h:53
@ ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK
Bitmask of shader engines.
Definition core.h:46
@ ROCPROFILER_THREAD_TRACE_PARAMETER_NO_DETAIL
Dont collect instruction timing, only shader-wide information.
Definition core.h:56
@ ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE
Size of combined GPU buffer for ATT.
Definition core.h:47
@ ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL
Period [1,32] or disable (0) perfmon.
Definition core.h:49
@ ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL
Serializes also kernels not under thread trace.
Definition core.h:51
@ ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER
Perfmon ID and SIMD mask. gfx9 only.
Definition core.h:50
@ ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT
Bitmask (GFX9) or ID (Navi) of SIMDs.
Definition core.h:48
@ ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU
Select the Target CU or WGP.
Definition core.h:45

Function Documentation

◆ rocprofiler_configure_device_thread_trace_service()

rocprofiler_status_t rocprofiler_configure_device_thread_trace_service ( rocprofiler_context_id_t  context_id,
rocprofiler_agent_id_t  agent_id,
rocprofiler_thread_trace_parameter_t parameters,
unsigned long  num_parameters,
rocprofiler_thread_trace_shader_data_callback_t  shader_callback,
rocprofiler_user_data_t  callback_userdata 
)

#include <rocprofiler-sdk/experimental/thread-trace/agent.h>

Configure Thread Trace Service for agent. There may only be one agent profile configured per context and can be only one active context that is profiling a single agent at a time. Multiple agent contexts can be started at the same time if they are profiling different agents.

Parameters
[in]context_idcontext id
[in]parametersList of ATT-specific parameters.
[in]num_parametersNumber of parameters. Zero is allowed.
[in]agent_idagent to configure profiling on.
[in]shader_callbackCallback fn where the collected data will be sent to.
[in]callback_userdataPassed back to user in shader_callback.
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_SUCCESSon success
ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKEDfor configuration locked
ROCPROFILER_STATUS_ERROR_CONTEXT_INVALIDfor conflicting configurations in the same ctx
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUNDfor invalid context id
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTfor invalid rocprofiler_thread_trace_parameter_t

◆ rocprofiler_configure_dispatch_thread_trace_service()

rocprofiler_status_t rocprofiler_configure_dispatch_thread_trace_service ( rocprofiler_context_id_t  context_id,
rocprofiler_agent_id_t  agent_id,
rocprofiler_thread_trace_parameter_t parameters,
unsigned long  num_parameters,
rocprofiler_thread_trace_dispatch_callback_t  dispatch_callback,
rocprofiler_thread_trace_shader_data_callback_t  shader_callback,
void *  callback_userdata 
)

#include <rocprofiler-sdk/experimental/thread-trace/dispatch.h>

Enables the thread trace service for dispatch-based tracing. The tool has an option to enable/disable thread trace on every dispatch callback. This service serializes all traced kernels, and optionally all non-traced kernels.

Parameters
[in]context_idid of the context used for start/stop thread_trace.
[in]agent_idrocprofiler_agent_id_t to configure thread trace.
[in]parametersList of ATT-specific parameters.
[in]num_parametersNumber of parameters. Zero is allowed.
[in]dispatch_callbackControl fn which decides when TT starts/stop collecting.
[in]shader_callbackCallback fn where the collected data will be sent to.
[in]callback_userdataPassed back to user in dispatch_callback.
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_SUCCESSon success
ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKEDfor configuration locked
ROCPROFILER_STATUS_ERROR_CONTEXT_INVALIDfor conflicting configurations in the same ctx
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUNDfor invalid context id
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTfor invalid rocprofiler_thread_trace_parameter_t
ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGUREDif already configured

◆ rocprofiler_thread_trace_decoder_codeobj_load()

rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_load ( rocprofiler_thread_trace_decoder_id_t  handle,
uint64_t  load_id,
uint64_t  load_addr,
uint64_t  load_size,
const void *  data,
uint64_t  size 
)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Loads a code object binary to match with Thread Trace. The size, data and load_* are reported by rocprofiler-sdk's code object tracing service. Used for the decoder library to know what code objects to look into when decoding shader data. Not all application code objects are required to be reported here, only the ones containing code executed at the time the shader data was collected by thread_trace services. If a code object not reported here is encountered while decoding shader data, a record of type INFO_STITCH_INCOMPLETE will be generated and instructions will not be reported with a PC address.

Parameters
[in]handleHandle to decoder instance.
[in]load_idCode object load ID.
[in]load_addrCode object load address.
[in]load_sizeCode object load size.
[in]dataCode object binary data.
[in]sizeCode object binary data size.
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_ERRORUnable to load code object.
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTInvalid handle
ROCPROFILER_STATUS_SUCCESSCode object loaded

◆ rocprofiler_thread_trace_decoder_codeobj_unload()

rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_unload ( rocprofiler_thread_trace_decoder_id_t  handle,
uint64_t  load_id 
)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Unloads a code object binary.

Parameters
[in]handleHandle to decoder instance.
[in]load_idCode object load ID to remove.
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_ERRORCode object not loaded.
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTInvalid handle
ROCPROFILER_STATUS_SUCCESSCode object unloaded

◆ rocprofiler_thread_trace_decoder_create()

rocprofiler_status_t rocprofiler_thread_trace_decoder_create ( rocprofiler_thread_trace_decoder_id_t handle,
const char *  path 
)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Initializes Trace Decoder library with a library search path.

Parameters
[out]handleHandle to created decoder instance.
[in]pathPath to trace decoder library location (e.g. /opt/rocm/lib).
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_ERROR_NOT_AVAILABLELibrary not found
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABILibrary found but version not supported
ROCPROFILER_STATUS_SUCCESSHandle created

◆ rocprofiler_thread_trace_decoder_destroy()

void rocprofiler_thread_trace_decoder_destroy ( rocprofiler_thread_trace_decoder_id_t  handle)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Deletes handle created by rocprofiler_thread_trace_decoder_create.

Parameters
[in]handleHandle to destroy

◆ rocprofiler_thread_trace_decoder_info_string()

const char * rocprofiler_thread_trace_decoder_info_string ( rocprofiler_thread_trace_decoder_id_t  handle,
rocprofiler_thread_trace_decoder_info_t  info 
)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Returns the string description of a rocprofiler_thread_trace_decoder_info_t record.

Parameters
[in]handleDecoder handle
[in]infoThe decoder info received
Return values
nullterminated string as description of "info".

◆ rocprofiler_trace_decode()

rocprofiler_status_t rocprofiler_trace_decode ( rocprofiler_thread_trace_decoder_id_t  handle,
rocprofiler_thread_trace_decoder_callback_t  callback,
void *  data,
uint64_t  size,
void *  userdata 
)

#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Decodes shader data returned by rocprofiler_thread_trace_shader_data_callback_t. Use rocprofiler_thread_trace_decoder_codeobj_load to add references to loaded code objects during the trace. A rocprofiler_thread_trace_decoder_callback_t returns decoded data back to user. The first record is always of type ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP.

Parameters
[in]handleDecoder handle
[in]callbackDecoded trace data returned to user.
[in]dataThread trace binary data.
[in]sizeThread trace binary size.
[in]userdataUserdata passed back to caller via callback.
Returns
rocprofiler_status_t
Return values
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTinvalid argument
ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTEDarch not supported
ROCPROFILER_STATUS_ERRORgeneric error
ROCPROFILER_STATUS_SUCCESSon success