rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h Source File

rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h Source File#

ROCprofiler-SDK developer API: rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h Source File
ROCprofiler-SDK developer API 1.0.0
ROCm Profiling API and tools
trace_decoder_types.h
1// MIT License
2//
3// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
25#include <stddef.h>
26#include <stdint.h>
27
28/**
29 * @defgroup THREAD_TRACE Thread Trace Service
30 * @brief ROCprof-trace-decoder defined types. All timestamp values are in shader clock units.
31 *
32 * @{
33 */
34
35/**
36 * @brief Describes the type of info received.
37 */
39{
40 ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0,
41 ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST,
42 ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE,
43 ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE,
44 ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST
46
47/**
48 * @brief Describes a PC address.
49 */
51{
52 uint64_t address; ///< Address (code_object_id == 0), or ELF vaddr (code_object_id != 0)
53 uint64_t code_object_id; ///< Zero if no code object was found.
55
56/**
57 * @brief Describes four performance counter values.
58 */
60{
61 int64_t time; ///< Shader clock timestamp in which these counters were read.
62 uint16_t events0; ///< Counter0 (bank==0) or Counter4 (bank==1).
63 uint16_t events1; ///< Counter1 (bank==0) or Counter5 (bank==1).
64 uint16_t events2; ///< Counter2 (bank==0) or Counter6 (bank==1).
65 uint16_t events3; ///< Counter3 (bank==0) or Counter7 (bank==1).
66 uint8_t CU; ///< Shader compute unit ID these counters were collected from.
67 uint8_t bank; ///< Selects counter group [0,3] or [4,7]
69
70/**
71 * @brief Describes an occupancy event (wave started or wave ended).
72 */
74{
75 rocprofiler_thread_trace_decoder_pc_t pc; ///< Wave start address (kernel entry point)
76 uint64_t time; ///< Timestamp of event
77 uint8_t reserved; ///< Reserved
78 uint8_t cu; ///< Compute unit ID (gfx9) or WGP ID (gfx10+).
79 uint8_t simd; ///< SIMD ID [0,3] within compute unit
80 uint8_t wave_id; ///< Wave slot ID within SIMD
81 uint32_t start : 1; ///< 1 if wave_start, 0 if a wave_end
82 uint32_t _rsvd : 31;
84
85/**
86 * @brief Wave state type.
87 */
89{
90 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0,
91 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE,
92 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC,
93 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT,
94 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL,
95 ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST,
97
98/**
99 * @brief A wave state change event.
100 */
102{
103 int32_t type; ///< one of rocprofiler_thread_trace_decoder_waveslot_state_type_t
104 int32_t duration; ///< state duration in cycles
106
107/**
108 * @brief Instruction type.
109 */
127
128/**
129 * @brief Describes an instruction execution event.
130 *
131 * The duration is measured as stall+issue time (gfx9) or stall+execution time (gfx10+).
132 * Time + duration marks the issue (gfx9) or execution (gfx10+) completion time.
133 * Time + stall marks the successful issue time.
134 * Duration - stall is the issue time (gfx9) or execution time (gfx10+).
135 */
137{
138 uint32_t category : 8; ///< One of rocprofiler_thread_trace_decoder_inst_category_t
139 uint32_t stall : 24; ///< Stall duration, in clock cycles.
140 int32_t duration; ///< Total instruction duration, in clock cycles.
141 int64_t time; ///< When the wave first attempted to execute this instruction.
144
145/**
146 * @brief Struct describing a wave during it's lifetime.
147 * This record is only generated for waves executing in the target_cu and target_simd, selected by
148 * ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT
149 *
150 * instructions_array contains a time-ordered list of all (traced) instructions by the wave.
151 */
153{
154 uint8_t cu; ///< CU id (gfx9) or wgp id (gfx10+). This is always the target_cu.
155 uint8_t simd; ///< SIMD ID [0,3].
156 uint8_t wave_id; ///< Wave slot ID within SIMD.
157 uint8_t contexts; ///< Counts how many CWSR events have occured during the wave lifetime.
158
159 uint32_t _rsvd1;
160 uint32_t _rsvd2;
161 uint32_t _rsvd3;
162
163 int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start.
164 int64_t end_time; ///< Wave end time. Should match occupancy event wave end.
165
166 uint64_t timeline_size; ///< timeline_array size
167 uint64_t instructions_size; ///< instructions_array size
171
172/**
173 * @brief Matches the reference (realtime) clock with the shader clock
174 * Added in rocprof-trace-decoder 0.1.3. Requires aqlprofile for rocm 7.1+.
175 * clock_in_seconds = realtime_clock / ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY
176 * gfx_frequency = delta(shader_clock) / delta(clock_in_seconds)
177 * For best average, use
178 * gfx_frequency[n] = (shader_clock[n]-shader_clock[0]) / (clock_in_seconds[n]-clock_in_seconds[0])
179 */
181{
182 int64_t shader_clock; ///< Clock timestamp in gfx clock units
183 uint64_t realtime_clock; ///< Clock timestamp in realtime units
184 uint64_t reserved;
186
187/**
188 * @brief Bitmask of additional information for shaderdata_t
189 * Added in rocprof-trace-decoder 0.1.3
190 */
192{
193 ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM = 0,
194 ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV ///< Generated by the trap handler
195
196 /// @var ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM
197 /// @brief Value comes from s_ttracedata_imm.
199
200/**
201 * @brief Record created by s_ttracedata and s_ttracedata_imm
202 * Added in rocprof-trace-decoder 0.1.3
203 */
205{
206 int64_t time;
207 uint64_t value; ///< Value written from M0/IMM
208 uint8_t cu; ///< CU id (gfx9) or wgp id (gfx10+).
209 uint8_t simd; ///< SIMD ID [0,3].
210 uint8_t wave_id; ///< Wave slot ID within SIMD.
211 uint8_t flags; ///< bitmask of rocprofiler_thread_trace_decoder_shaderdata_flags_t
212 uint32_t reserved;
214
215/**
216 * @brief Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t
217 */
219{
220 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type uint64_t
221 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t*
222 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t*
223 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t*
224 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, ///< rocprofiler_thread_trace_decoder_info_t*
226 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA, ///< rocprofiler_thread_trace_decoder_shaderdata_t*
227 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME, ///< rocprofiler_thread_trace_decoder_realtime_t*
228 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY,
229 ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST
230
231 /// @var ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY
232 /// @brief uint64_t*. Realtime clock frequency in Hz.
234
235/** @} */
uint8_t contexts
Counts how many CWSR events have occured during the wave lifetime.
int32_t type
one of rocprofiler_thread_trace_decoder_waveslot_state_type_t
int64_t time
When the wave first attempted to execute this instruction.
uint16_t events0
Counter0 (bank==0) or Counter4 (bank==1).
rocprofiler_thread_trace_decoder_pc_t pc
Wave start address (kernel entry point)
rocprofiler_thread_trace_decoder_inst_t * instructions_array
Instructions executed.
uint8_t cu
CU id (gfx9) or wgp id (gfx10+).
uint16_t events2
Counter2 (bank==0) or Counter6 (bank==1).
uint32_t category
One of rocprofiler_thread_trace_decoder_inst_category_t.
int32_t duration
Total instruction duration, in clock cycles.
int64_t shader_clock
Clock timestamp in gfx clock units.
int64_t end_time
Wave end time. Should match occupancy event wave end.
int64_t begin_time
Wave begin time. Should match occupancy event wave start.
int64_t time
Shader clock timestamp in which these counters were read.
uint16_t events3
Counter3 (bank==0) or Counter7 (bank==1).
uint64_t address
Address (code_object_id == 0), or ELF vaddr (code_object_id != 0)
uint8_t cu
CU id (gfx9) or wgp id (gfx10+). This is always the target_cu.
uint8_t CU
Shader compute unit ID these counters were collected from.
uint8_t wave_id
Wave slot ID within SIMD.
uint64_t code_object_id
Zero if no code object was found.
uint8_t simd
SIMD ID [0,3] within compute unit.
uint8_t flags
bitmask of rocprofiler_thread_trace_decoder_shaderdata_flags_t
uint8_t cu
Compute unit ID (gfx9) or WGP ID (gfx10+).
uint32_t stall
Stall duration, in clock cycles.
uint64_t instructions_size
instructions_array size
rocprofiler_thread_trace_decoder_wave_state_t * timeline_array
wave state change events
uint32_t start
1 if wave_start, 0 if a wave_end
uint8_t bank
Selects counter group [0,3] or [4,7].
uint16_t events1
Counter1 (bank==0) or Counter5 (bank==1).
uint64_t realtime_clock
Clock timestamp in realtime units.
rocprofiler_thread_trace_decoder_shaderdata_flags_t
Bitmask of additional information for shaderdata_t Added in rocprof-trace-decoder 0....
rocprofiler_thread_trace_decoder_record_type_t
Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t.
rocprofiler_thread_trace_decoder_info_t
Describes the type of info received.
rocprofiler_thread_trace_decoder_wstate_type_t
Wave state type.
rocprofiler_thread_trace_decoder_inst_category_t
Instruction type.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP
Record is gfxip_major, type uint64_t.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA
rocprofiler_thread_trace_decoder_shaderdata_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG
Debug.
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO
rocprofiler_thread_trace_decoder_info_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY
rocprofiler_thread_trace_decoder_occupancy_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE
rocprofiler_thread_trace_decoder_wave_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME
rocprofiler_thread_trace_decoder_realtime_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT
rocprofiler_thread_trace_decoder_perfevent_t*
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH
Raytrace op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU
Vector ALU op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT
Flat addressing vmem or lds.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED
Internal operation.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT
Wave context switch.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS
Local Data Share op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM
Scalar memory op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT
Branch not taken.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU
Scalar ALU op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE
MSG types.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM
Vector memory op.
@ ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP
Branch taken.
Describes an instruction execution event.
Describes an occupancy event (wave started or wave ended).
Describes four performance counter values.
Matches the reference (realtime) clock with the shader clock Added in rocprof-trace-decoder 0....
Record created by s_ttracedata and s_ttracedata_imm Added in rocprof-trace-decoder 0....
Struct describing a wave during it's lifetime. This record is only generated for waves executing in t...