Thread trace

Contents

Thread trace#

enum rocprofiler_thread_trace_parameter_type_t#

Types of Thread Trace parameters.

Values:

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU#

Select the Target CU or WGP.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK#

Bitmask of shader engines.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE#

Size of combined GPU buffer for ATT.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT#

Bitmask (GFX9) or ID (Navi) of SIMDs.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL#

Period [1,32] or disable (0) perfmon.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER#

Perfmon ID and SIMD mask.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL#

Serializes kernels not under thread trace.

enumerator ROCPROFILER_THREAD_TRACE_PARAMETER_LAST#
enum rocprofiler_thread_trace_control_flags_t#

Values:

enumerator ROCPROFILER_THREAD_TRACE_CONTROL_NONE#
enumerator ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP#
enum rocprofiler_thread_trace_decoder_info_t#

Describes the type of info received.

Values:

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST#
enum rocprofiler_thread_trace_decoder_wstate_type_t#

Wave state type.

Values:

enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST#
enum rocprofiler_thread_trace_decoder_inst_category_t#

Instruction type.

Values:

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE#
enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM#

Scalar memory op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU#

Scalar ALU op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM#

Vector memory op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT#

Flat addressing vmem or lds.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS#

Local Data Share op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU#

Vector ALU op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP#

Branch taken.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT#

Branch not taken.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED#

Internal operation.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT#

Wave context switch.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE#

MSG types.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH#

Raytrace op.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST#
enum rocprofiler_thread_trace_decoder_record_type_t#

Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t.

Values:

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP#

Record is gfxip_major, type size_t.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY#

rocprofiler_thread_trace_decoder_occupancy_t*

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT#

rocprofiler_thread_trace_decoder_perfevent_t*

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE#

rocprofiler_thread_trace_decoder_wave_t*

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO#

rocprofiler_thread_trace_decoder_info_t*

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG#

Debug.

enumerator ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST#
typedef void (*rocprofiler_thread_trace_shader_data_callback_t)(rocprofiler_agent_id_t agent, int64_t shader_engine_id, void *data, unsigned long data_size, rocprofiler_user_data_t userdata)#

Callback to be triggered every time some ATT data is generated by the device.

Param agent:

[in] Identifier for the target agent (

Param shader_engine_id:

[in] ID of shader engine, as enabled by SE_MASK

Param data:

[in] Pointer to the buffer containing the ATT data

Param data_size:

[in] Number of bytes in “data”

Param userdata:

[in] Passed back to user from rocprofiler_thread_trace_dispatch_callback_t()

typedef rocprofiler_thread_trace_control_flags_t (*rocprofiler_thread_trace_dispatch_callback_t)(rocprofiler_agent_id_t agent_id, rocprofiler_queue_id_t queue_id, rocprofiler_async_correlation_id_t correlation_id, rocprofiler_kernel_id_t kernel_id, rocprofiler_dispatch_id_t dispatch_id, void *userdata_config, rocprofiler_user_data_t *userdata_shader)#

Callback to be triggered every kernel dispatch, indicating to start and/or stop ATT.

Param agent_id:

[in] agent_id.

Param queue_id:

[in] queue_id.

Param correlation_id:

[in] internal correlation id.

Param kernel_id:

[in] kernel_id.

Param dispatch_id:

[in] dispatch_id.

Param userdata_config:

[in] Userdata passed back from rocprofiler_configure_dispatch_thread_trace_service.

Param userdata_shader:

[out] Userdata to be passed in shader_callback

typedef void (*rocprofiler_thread_trace_decoder_callback_t)(rocprofiler_thread_trace_decoder_record_type_t record_type_id, void *trace_events, uint64_t trace_size, void *userdata)#

Callback for rocprof-trace-decoder to return decoder traces back to user.

Param record_type_id:

[in] One of rocprofiler_thread_trace_decoder_record_type_t

Param trace_events:

[in] A pointer to sequence of events, of size trace_size.

Param trace_size:

[in] The number of events in the trace.

Param userdata:

[in] Arbitrary data pointer to be sent back to the user via callback.

rocprofiler_status_t rocprofiler_configure_device_thread_trace_service(rocprofiler_context_id_t context_id, rocprofiler_agent_id_t agent_id, rocprofiler_thread_trace_parameter_t *parameters, unsigned long num_parameters, rocprofiler_thread_trace_shader_data_callback_t shader_callback, rocprofiler_user_data_t callback_userdata)#

Configure Thread Trace Service for agent. There may only be one agent profile configured per context and can be only one active context that is profiling a single agent at a time. Multiple agent contexts can be started at the same time if they are profiling different agents.

Parameters:
  • context_id[in] context id

  • parameters[in] List of ATT-specific parameters.

  • num_parameters[in] Number of parameters. Zero is allowed.

  • agent_id[in] agent to configure profiling on.

  • shader_callback[in] Callback fn where the collected data will be sent to.

  • callback_userdata[in] Passed back to user in shader_callback.

Return values:
  • ROCPROFILER_STATUS_SUCCESS – on success

  • ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED – for configuration locked

  • ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID – for conflicting configurations in the same ctx

  • ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND – for invalid context id

  • ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT – for invalid rocprofiler_thread_trace_parameter_t

Returns:

rocprofiler_status_t

rocprofiler_status_t rocprofiler_configure_dispatch_thread_trace_service(rocprofiler_context_id_t context_id, rocprofiler_agent_id_t agent_id, rocprofiler_thread_trace_parameter_t *parameters, unsigned long num_parameters, rocprofiler_thread_trace_dispatch_callback_t dispatch_callback, rocprofiler_thread_trace_shader_data_callback_t shader_callback, void *callback_userdata)#

Enables the thread trace service for dispatch-based tracing. The tool has an option to enable/disable thread trace on every dispatch callback. This service serializes all traced kernels, and optionally all non-traced kernels.

Parameters:
  • context_id[in] id of the context used for start/stop thread_trace.

  • agent_id[in] rocprofiler_agent_id_t to configure thread trace.

  • parameters[in] List of ATT-specific parameters.

  • num_parameters[in] Number of parameters. Zero is allowed.

  • dispatch_callback[in] Control fn which decides when TT starts/stop collecting.

  • shader_callback[in] Callback fn where the collected data will be sent to.

  • callback_userdata[in] Passed back to user in dispatch_callback.

Return values:
  • ROCPROFILER_STATUS_SUCCESS – on success

  • ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED – for configuration locked

  • ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID – for conflicting configurations in the same ctx

  • ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND – for invalid context id

  • ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT – for invalid rocprofiler_thread_trace_parameter_t

  • ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED – if already configured

Returns:

rocprofiler_status_t

rocprofiler_status_t rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_t *handle, const char *path)#

Initializes Trace Decoder library with a library search path.

Parameters:
  • handle[out] Handle to created decoder instance.

  • path[in] Path to trace decoder library location (e.g. /opt/rocm/lib).

Return values:
  • ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE – Library not found

  • ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI – Library found but version not supported

  • ROCPROFILER_STATUS_SUCCESS – Handle created

Returns:

rocprofiler_status_t

void rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle_t handle)#

Deletes handle created by rocprofiler_thread_trace_decoder_create.

Parameters:

handle[in] Handle to destroy

rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_handle_t handle, uint64_t load_id, uint64_t load_addr, uint64_t load_size, const void *data, uint64_t size)#

Loads a code object binary to match with Thread Trace. The size, data and load_* are reported by rocprofiler-sdk’s code object tracing service. Used for the decoder library to know what code objects to look into when decoding shader data. Not all application code objects are required to be reported here, only the ones containing code executed at the time the shader data was collected by thread_trace services. If a code object not reported here is encountered while decoding shader data, a record of type INFO_STITCH_INCOMPLETE will be generated and instructions will not be reported with a PC address.

Parameters:
  • handle[in] Handle to decoder instance.

  • load_id[in] Code object load ID.

  • load_addr[in] Code object load address.

  • load_size[in] Code object load size.

  • data[in] Code object binary data.

  • size[in] Code object binary data size.

Return values:
  • ROCPROFILER_STATUS_ERROR – Unable to load code object.

  • ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT – Invalid handle

  • ROCPROFILER_STATUS_SUCCESS – Code object loaded

Returns:

rocprofiler_status_t

rocprofiler_status_t rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_handle_t handle, uint64_t load_id)#

Unloads a code object binary.

Parameters:
  • handle[in] Handle to decoder instance.

  • load_id[in] Code object load ID to remove.

Return values:
  • ROCPROFILER_STATUS_ERROR – Code object not loaded.

  • ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT – Invalid handle

  • ROCPROFILER_STATUS_SUCCESS – Code object unloaded

Returns:

rocprofiler_status_t

rocprofiler_status_t rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, rocprofiler_thread_trace_decoder_callback_t callback, void *data, uint64_t size, void *userdata)#

Decodes shader data returned by rocprofiler_thread_trace_shader_data_callback_t. Use rocprofiler_thread_trace_decoder_codeobj_load to add references to loaded code objects during the trace. A rocprofiler_thread_trace_decoder_callback_t returns decoded data back to user. The first record is always of type ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP.

Parameters:
  • handle[in] Decoder handle

  • callback[in] Decoded trace data returned to user.

  • data[in] Thread trace binary data.

  • size[in] Thread trace binary size.

  • userdata[in] Userdata passed back to caller via callback.

Return values:
  • ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT – invalid argument

  • ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED – arch not supported

  • ROCPROFILER_STATUS_ERROR – generic error

  • ROCPROFILER_STATUS_SUCCESS – on success

Returns:

rocprofiler_status_t

const char *rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_handle_t handle, rocprofiler_thread_trace_decoder_info_t info)#

Returns the string description of a rocprofiler_thread_trace_decoder_info_t record.

Parameters:
  • handle[in] Decoder handle

  • info[in] The decoder info received

Return values:

null – terminated string as description of “info”.

struct rocprofiler_thread_trace_parameter_t#
#include <rocprofiler-sdk/experimental/thread-trace/core.h>

Thread Trace parameter specification.

struct rocprofiler_thread_trace_decoder_handle_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>

Handle containing a loaded rocprof-trace-decoder and a decoder state.

struct rocprofiler_thread_trace_decoder_pc_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Describes a PC address.

struct rocprofiler_thread_trace_decoder_perfevent_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Describes four performance counter values.

struct rocprofiler_thread_trace_decoder_occupancy_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Describes an occupancy event (wave started or wave ended).

struct rocprofiler_thread_trace_decoder_wave_state_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

A wave state change event.

struct rocprofiler_thread_trace_decoder_inst_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Describes an instruction execution event.

The duration is measured as stall+issue time (gfx9) or stall+execution time (gfx10+). Time + duration marks the issue (gfx9) or execution (gfx10+) completion time. Time + stall marks the successful issue time. Duration - stall is the issue time (gfx9) or execution time (gfx10+).

struct rocprofiler_thread_trace_decoder_wave_t#
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>

Struct describing a wave during it’s lifetime. This record is only generated for waves executing in the target_cu and target_simd, selected by ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT.

instructions_array contains a time-ordered list of all (traced) instructions by the wave.

rocprofiler_thread_trace_parameter_t.__unnamed12__

Public Members

uint64_t value#
struct rocprofiler_thread_trace_parameter_t
rocprofiler_thread_trace_parameter_t.__unnamed12__.__unnamed14__