rocprofiler-sdk/cxx/serialization/load.hpp Source File

rocprofiler-sdk/cxx/serialization/load.hpp Source File#

ROCprofiler-SDK developer API: rocprofiler-sdk/cxx/serialization/load.hpp Source File
ROCprofiler-SDK developer API 1.0.0
ROCm Profiling API and tools
load.hpp
1// MIT License
2//
3// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22//
23
24#pragma once
25
26#include <rocprofiler-sdk/buffer.h>
27#include <rocprofiler-sdk/callback_tracing.h>
28#include <rocprofiler-sdk/external_correlation.h>
29#include <rocprofiler-sdk/fwd.h>
30#include <rocprofiler-sdk/internal_threading.h>
32#include <rocprofiler-sdk/cxx/name_info.hpp>
33#include <rocprofiler-sdk/cxx/utility.hpp>
34
35#include <cereal/archives/binary.hpp>
36#include <cereal/archives/json.hpp>
37#include <cereal/archives/portable_binary.hpp>
38#include <cereal/cereal.hpp>
39#include <cereal/types/array.hpp>
40#include <cereal/types/atomic.hpp>
41#include <cereal/types/bitset.hpp>
42#include <cereal/types/chrono.hpp>
43#include <cereal/types/common.hpp>
44#include <cereal/types/complex.hpp>
45#include <cereal/types/deque.hpp>
46#include <cereal/types/functional.hpp>
47#include <cereal/types/list.hpp>
48#include <cereal/types/map.hpp>
49#include <cereal/types/optional.hpp>
50#include <cereal/types/queue.hpp>
51#include <cereal/types/set.hpp>
52#include <cereal/types/stack.hpp>
53#include <cereal/types/string.hpp>
54#include <cereal/types/unordered_map.hpp>
55#include <cereal/types/unordered_set.hpp>
56#include <cereal/types/utility.hpp>
57#include <cereal/types/variant.hpp>
58#include <cereal/types/vector.hpp>
59
60#include <string>
61#include <string_view>
62#include <type_traits>
63#include <utility>
64#include <vector>
65
66#if defined(ROCPROFILER_SDK_CXX_SERIALIZATION_LOAD_DEBUG) && \
67 ROCPROFILER_SDK_CXX_SERIALIZATION_LOAD_DEBUG > 0
68# define ROCP_SDK_LOAD_MESSAGE(NAME) \
69 std::clog << "[" << __PRETTY_FUNCTION__ << "] loading JSON field " << NAME << "...\n" \
70 << std::flush
71#else
72# define ROCP_SDK_LOAD_MESSAGE(NAME)
73#endif
74
75#define ROCP_SDK_LOAD_DATA_FIELD(FIELD) \
76 ROCP_SDK_LOAD_MESSAGE(#FIELD); \
77 ar(make_nvp(#FIELD, data.FIELD))
78#define ROCP_SDK_LOAD_DATA_VALUE(NAME, VALUE) \
79 ROCP_SDK_LOAD_MESSAGE(NAME); \
80 ar(make_nvp(NAME, data.VALUE))
81#define ROCP_SDK_LOAD_VALUE(NAME, VALUE) \
82 ROCP_SDK_LOAD_MESSAGE(NAME); \
83 ar(make_nvp(NAME, VALUE))
84#define ROCP_SDK_LOAD_DATA_CSTR(FIELD) \
85 { \
86 ROCP_SDK_LOAD_MESSAGE(#FIELD); \
87 auto _val = new std::string{}; \
88 ar(make_nvp(#FIELD, *_val)); \
89 data.FIELD = _val->c_str(); \
90 }
91#define ROCP_SDK_LOAD_DATA_BITFIELD(NAME, VALUE) \
92 { \
93 ROCP_SDK_LOAD_MESSAGE(NAME); \
94 auto _val = data.VALUE; \
95 ar(make_nvp(NAME, _val)); \
96 data.VALUE = _val; \
97 }
98
99#if !defined(ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN)
100# define ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN \
101 namespace cereal \
102 {
103#endif
104
105#if !defined(ROCPROFILER_SDK_CEREAL_NAMESPACE_END)
106# define ROCPROFILER_SDK_CEREAL_NAMESPACE_END } // namespace cereal
107#endif
108
109ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN
110
111template <typename ArchiveT>
112void
113load(ArchiveT& ar, rocprofiler_agent_id_t& data)
114{
115 ROCP_SDK_LOAD_DATA_FIELD(handle);
116}
117
118template <typename ArchiveT>
119void
120load(ArchiveT& ar, HsaCacheType& data)
121{
122 ROCP_SDK_LOAD_DATA_BITFIELD("Data", ui32.Data);
123 ROCP_SDK_LOAD_DATA_BITFIELD("Instruction", ui32.Instruction);
124 ROCP_SDK_LOAD_DATA_BITFIELD("CPU", ui32.CPU);
125 ROCP_SDK_LOAD_DATA_BITFIELD("HSACU", ui32.HSACU);
126}
127
128template <typename ArchiveT>
129void
130load(ArchiveT& ar, HSA_LINKPROPERTY& data)
131{
132 ROCP_SDK_LOAD_DATA_BITFIELD("Override", ui32.Override);
133 ROCP_SDK_LOAD_DATA_BITFIELD("NonCoherent", ui32.NonCoherent);
134 ROCP_SDK_LOAD_DATA_BITFIELD("NoAtomics32bit", ui32.NoAtomics32bit);
135 ROCP_SDK_LOAD_DATA_BITFIELD("NoAtomics64bit", ui32.NoAtomics64bit);
136 ROCP_SDK_LOAD_DATA_BITFIELD("NoPeerToPeerDMA", ui32.NoPeerToPeerDMA);
137}
138
139template <typename ArchiveT>
140void
141load(ArchiveT& ar, HSA_CAPABILITY& data)
142{
143 ROCP_SDK_LOAD_DATA_BITFIELD("HotPluggable", ui32.HotPluggable);
144 ROCP_SDK_LOAD_DATA_BITFIELD("HSAMMUPresent", ui32.HSAMMUPresent);
145 ROCP_SDK_LOAD_DATA_BITFIELD("SharedWithGraphics", ui32.SharedWithGraphics);
146 ROCP_SDK_LOAD_DATA_BITFIELD("QueueSizePowerOfTwo", ui32.QueueSizePowerOfTwo);
147 ROCP_SDK_LOAD_DATA_BITFIELD("QueueSize32bit", ui32.QueueSize32bit);
148 ROCP_SDK_LOAD_DATA_BITFIELD("QueueIdleEvent", ui32.QueueIdleEvent);
149 ROCP_SDK_LOAD_DATA_BITFIELD("VALimit", ui32.VALimit);
150 ROCP_SDK_LOAD_DATA_BITFIELD("WatchPointsSupported", ui32.WatchPointsSupported);
151 ROCP_SDK_LOAD_DATA_BITFIELD("WatchPointsTotalBits", ui32.WatchPointsTotalBits);
152 ROCP_SDK_LOAD_DATA_BITFIELD("DoorbellType", ui32.DoorbellType);
153 ROCP_SDK_LOAD_DATA_BITFIELD("AQLQueueDoubleMap", ui32.AQLQueueDoubleMap);
154 ROCP_SDK_LOAD_DATA_BITFIELD("DebugTrapSupported", ui32.DebugTrapSupported);
155 ROCP_SDK_LOAD_DATA_BITFIELD("WaveLaunchTrapOverrideSupported",
156 ui32.WaveLaunchTrapOverrideSupported);
157 ROCP_SDK_LOAD_DATA_BITFIELD("WaveLaunchModeSupported", ui32.WaveLaunchModeSupported);
158 ROCP_SDK_LOAD_DATA_BITFIELD("PreciseMemoryOperationsSupported",
159 ui32.PreciseMemoryOperationsSupported);
160 ROCP_SDK_LOAD_DATA_BITFIELD("DEPRECATED_SRAM_EDCSupport", ui32.DEPRECATED_SRAM_EDCSupport);
161 ROCP_SDK_LOAD_DATA_BITFIELD("Mem_EDCSupport", ui32.Mem_EDCSupport);
162 ROCP_SDK_LOAD_DATA_BITFIELD("RASEventNotify", ui32.RASEventNotify);
163 ROCP_SDK_LOAD_DATA_BITFIELD("ASICRevision", ui32.ASICRevision);
164 ROCP_SDK_LOAD_DATA_BITFIELD("SRAM_EDCSupport", ui32.SRAM_EDCSupport);
165 ROCP_SDK_LOAD_DATA_BITFIELD("SVMAPISupported", ui32.SVMAPISupported);
166 ROCP_SDK_LOAD_DATA_BITFIELD("CoherentHostAccess", ui32.CoherentHostAccess);
167 ROCP_SDK_LOAD_DATA_BITFIELD("DebugSupportedFirmware", ui32.DebugSupportedFirmware);
168}
169
170template <typename ArchiveT>
171void
172load(ArchiveT& ar, rocprofiler_dim3_t& data)
173{
174 ROCP_SDK_LOAD_DATA_FIELD(x);
175 ROCP_SDK_LOAD_DATA_FIELD(y);
176 ROCP_SDK_LOAD_DATA_FIELD(z);
177}
178
179template <typename ArchiveT>
180void
181load(ArchiveT& ar, HSA_MEMORYPROPERTY& data)
182{
183 ROCP_SDK_LOAD_DATA_BITFIELD("HotPluggable", ui32.HotPluggable);
184 ROCP_SDK_LOAD_DATA_BITFIELD("NonVolatile", ui32.NonVolatile);
185}
186
187template <typename ArchiveT>
188void
189load(ArchiveT& ar, HSA_ENGINE_VERSION& data)
190{
191 ROCP_SDK_LOAD_DATA_BITFIELD("uCodeSDMA", uCodeSDMA);
192 ROCP_SDK_LOAD_DATA_BITFIELD("uCodeRes", uCodeRes);
193}
194
195template <typename ArchiveT>
196void
197load(ArchiveT& ar, HSA_ENGINE_ID& data)
198{
199 ROCP_SDK_LOAD_DATA_BITFIELD("uCode", ui32.uCode);
200 ROCP_SDK_LOAD_DATA_BITFIELD("Major", ui32.Major);
201 ROCP_SDK_LOAD_DATA_BITFIELD("Minor", ui32.Minor);
202 ROCP_SDK_LOAD_DATA_BITFIELD("Stepping", ui32.Stepping);
203}
204
205template <typename ArchiveT>
206void
207load(ArchiveT& ar, rocprofiler_agent_cache_t& data)
208{
209 ROCP_SDK_LOAD_DATA_FIELD(processor_id_low);
210 ROCP_SDK_LOAD_DATA_FIELD(size);
211 ROCP_SDK_LOAD_DATA_FIELD(level);
212 ROCP_SDK_LOAD_DATA_FIELD(cache_line_size);
213 ROCP_SDK_LOAD_DATA_FIELD(cache_lines_per_tag);
214 ROCP_SDK_LOAD_DATA_FIELD(association);
215 ROCP_SDK_LOAD_DATA_FIELD(latency);
216 ROCP_SDK_LOAD_DATA_FIELD(type);
217}
218
219template <typename ArchiveT>
220void
221load(ArchiveT& ar, rocprofiler_agent_io_link_t& data)
222{
223 ROCP_SDK_LOAD_DATA_FIELD(type);
224 ROCP_SDK_LOAD_DATA_FIELD(version_major);
225 ROCP_SDK_LOAD_DATA_FIELD(version_minor);
226 ROCP_SDK_LOAD_DATA_FIELD(node_from);
227 ROCP_SDK_LOAD_DATA_FIELD(node_to);
228 ROCP_SDK_LOAD_DATA_FIELD(weight);
229 ROCP_SDK_LOAD_DATA_FIELD(min_latency);
230 ROCP_SDK_LOAD_DATA_FIELD(max_latency);
231 ROCP_SDK_LOAD_DATA_FIELD(min_bandwidth);
232 ROCP_SDK_LOAD_DATA_FIELD(max_bandwidth);
233 ROCP_SDK_LOAD_DATA_FIELD(recommended_transfer_size);
234 ROCP_SDK_LOAD_DATA_FIELD(flags);
235}
236
237template <typename ArchiveT>
238void
239load(ArchiveT& ar, rocprofiler_agent_mem_bank_t& data)
240{
241 ROCP_SDK_LOAD_DATA_FIELD(heap_type);
242 ROCP_SDK_LOAD_DATA_FIELD(flags);
243 ROCP_SDK_LOAD_DATA_FIELD(width);
244 ROCP_SDK_LOAD_DATA_FIELD(mem_clk_max);
245 ROCP_SDK_LOAD_DATA_FIELD(size_in_bytes);
246}
247
248template <typename ArchiveT>
249void
250load(ArchiveT& ar, rocprofiler_agent_v0_t& data)
251{
252 ROCP_SDK_LOAD_DATA_FIELD(size);
253 ROCP_SDK_LOAD_DATA_FIELD(id);
254 ROCP_SDK_LOAD_DATA_FIELD(type);
255 ROCP_SDK_LOAD_DATA_FIELD(cpu_cores_count);
256 ROCP_SDK_LOAD_DATA_FIELD(simd_count);
257 ROCP_SDK_LOAD_DATA_FIELD(mem_banks_count);
258 ROCP_SDK_LOAD_DATA_FIELD(caches_count);
259 ROCP_SDK_LOAD_DATA_FIELD(io_links_count);
260 ROCP_SDK_LOAD_DATA_FIELD(cpu_core_id_base);
261 ROCP_SDK_LOAD_DATA_FIELD(simd_id_base);
262 ROCP_SDK_LOAD_DATA_FIELD(max_waves_per_simd);
263 ROCP_SDK_LOAD_DATA_FIELD(lds_size_in_kb);
264 ROCP_SDK_LOAD_DATA_FIELD(gds_size_in_kb);
265 ROCP_SDK_LOAD_DATA_FIELD(num_gws);
266 ROCP_SDK_LOAD_DATA_FIELD(wave_front_size);
267 ROCP_SDK_LOAD_DATA_FIELD(num_xcc);
268 ROCP_SDK_LOAD_DATA_FIELD(cu_count);
269 ROCP_SDK_LOAD_DATA_FIELD(array_count);
270 ROCP_SDK_LOAD_DATA_FIELD(num_shader_banks);
271 ROCP_SDK_LOAD_DATA_FIELD(simd_arrays_per_engine);
272 ROCP_SDK_LOAD_DATA_FIELD(cu_per_simd_array);
273 ROCP_SDK_LOAD_DATA_FIELD(simd_per_cu);
274 ROCP_SDK_LOAD_DATA_FIELD(max_slots_scratch_cu);
275 ROCP_SDK_LOAD_DATA_FIELD(gfx_target_version);
276 ROCP_SDK_LOAD_DATA_FIELD(vendor_id);
277 ROCP_SDK_LOAD_DATA_FIELD(device_id);
278 ROCP_SDK_LOAD_DATA_FIELD(location_id);
279 ROCP_SDK_LOAD_DATA_FIELD(domain);
280 ROCP_SDK_LOAD_DATA_FIELD(drm_render_minor);
281 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_engines);
282 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_xgmi_engines);
283 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_queues_per_engine);
284 ROCP_SDK_LOAD_DATA_FIELD(num_cp_queues);
285 ROCP_SDK_LOAD_DATA_FIELD(max_engine_clk_ccompute);
286 ROCP_SDK_LOAD_DATA_FIELD(max_engine_clk_fcompute);
287 ROCP_SDK_LOAD_DATA_FIELD(sdma_fw_version);
288 ROCP_SDK_LOAD_DATA_FIELD(fw_version);
289 ROCP_SDK_LOAD_DATA_FIELD(capability);
290 ROCP_SDK_LOAD_DATA_FIELD(cu_per_engine);
291 ROCP_SDK_LOAD_DATA_FIELD(max_waves_per_cu);
292 ROCP_SDK_LOAD_DATA_FIELD(family_id);
293 ROCP_SDK_LOAD_DATA_FIELD(workgroup_max_size);
294 ROCP_SDK_LOAD_DATA_FIELD(grid_max_size);
295 ROCP_SDK_LOAD_DATA_FIELD(local_mem_size);
296 ROCP_SDK_LOAD_DATA_FIELD(hive_id);
297 ROCP_SDK_LOAD_DATA_FIELD(gpu_id);
298 ROCP_SDK_LOAD_DATA_FIELD(workgroup_max_dim);
299 ROCP_SDK_LOAD_DATA_FIELD(grid_max_dim);
300 ROCP_SDK_LOAD_DATA_CSTR(name);
301 ROCP_SDK_LOAD_DATA_CSTR(vendor_name);
302 ROCP_SDK_LOAD_DATA_CSTR(product_name);
303 ROCP_SDK_LOAD_DATA_CSTR(model_name);
304 ROCP_SDK_LOAD_DATA_FIELD(node_id);
305 ROCP_SDK_LOAD_DATA_FIELD(logical_node_id);
306 ROCP_SDK_LOAD_DATA_FIELD(logical_node_type_id);
307
308 auto generate = [&](auto name, const auto*& value, auto& size) {
309 using value_type =
310 std::remove_const_t<std::remove_pointer_t<std::remove_reference_t<decltype(value)>>>;
311 auto vec = std::vector<value_type>{};
312 ar(make_nvp(name, vec));
313 size = vec.size();
314 auto* value_m = new value_type[size];
315 for(uint64_t i = 0; i < size; ++i)
316 value_m[i] = vec.at(i);
317 value = value_m;
318 };
319
320 generate("mem_banks", data.mem_banks, data.mem_banks_count);
321 generate("caches", data.caches, data.caches_count);
322 generate("io_links", data.io_links, data.io_links_count);
323}
324
325template <typename ArchiveT>
326void
327load(ArchiveT& ar, rocprofiler_address_t& data)
328{
329 ROCP_SDK_LOAD_DATA_FIELD(handle);
330}
331
332template <typename ArchiveT>
333void
335{
336 ROCP_SDK_LOAD_DATA_FIELD(size);
337 ROCP_SDK_LOAD_DATA_FIELD(code_object_id);
338 ROCP_SDK_LOAD_DATA_FIELD(agent_id);
339 ROCP_SDK_LOAD_DATA_CSTR(uri);
340 ROCP_SDK_LOAD_DATA_FIELD(load_base);
341 ROCP_SDK_LOAD_DATA_FIELD(load_size);
342 ROCP_SDK_LOAD_DATA_FIELD(load_delta);
343 ROCP_SDK_LOAD_DATA_FIELD(storage_type);
344 if(data.storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE)
345 {
346 ROCP_SDK_LOAD_DATA_FIELD(storage_file);
347 }
348 else if(data.storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY)
349 {
350 ROCP_SDK_LOAD_DATA_FIELD(memory_base);
351 ROCP_SDK_LOAD_DATA_FIELD(memory_size);
352 }
353}
354
355template <typename ArchiveT>
356void
358{
359 ROCP_SDK_LOAD_DATA_FIELD(size);
360 ROCP_SDK_LOAD_DATA_FIELD(kernel_id);
361 ROCP_SDK_LOAD_DATA_FIELD(code_object_id);
362 ROCP_SDK_LOAD_DATA_CSTR(kernel_name);
363 ROCP_SDK_LOAD_DATA_FIELD(kernel_object);
364 ROCP_SDK_LOAD_DATA_FIELD(kernarg_segment_size);
365 ROCP_SDK_LOAD_DATA_FIELD(kernarg_segment_alignment);
366 ROCP_SDK_LOAD_DATA_FIELD(group_segment_size);
367 ROCP_SDK_LOAD_DATA_FIELD(private_segment_size);
368 ROCP_SDK_LOAD_DATA_FIELD(sgpr_count);
369 ROCP_SDK_LOAD_DATA_FIELD(arch_vgpr_count);
370 ROCP_SDK_LOAD_DATA_FIELD(accum_vgpr_count);
371 ROCP_SDK_LOAD_DATA_FIELD(kernel_code_entry_byte_offset);
372 ROCP_SDK_LOAD_DATA_FIELD(kernel_address);
373}
374
375ROCPROFILER_SDK_CEREAL_NAMESPACE_END
376
377#undef ROCP_SDK_LOAD_DATA_FIELD
378#undef ROCP_SDK_LOAD_DATA_VALUE
379#undef ROCP_SDK_LOAD_DATA_CSTR
380#undef ROCP_SDK_LOAD_DATA_BITFIELD
const rocprofiler_agent_io_link_t * io_links
array of IO link info
Definition agent.h:203
uint32_t mem_banks_count
Definition agent.h:141
uint32_t caches_count
Definition agent.h:143
const rocprofiler_agent_mem_bank_t * mem_banks
array of memory bank info
Definition agent.h:201
uint32_t io_links_count
Definition agent.h:144
const rocprofiler_agent_cache_t * caches
array of cache info
Definition agent.h:202
Cache information for an agent.
Definition agent.h:56
Memory bank information for an agent.
Definition agent.h:91
Stores the properties of an agent (CPU, GPU, etc.)
Definition agent.h:131
Agent Identifier.
Definition fwd.h:677
Multi-dimensional struct of data used to describe GPU workgroup and grid sizes.
Definition fwd.h:702
Stores memory address for profiling.
Definition fwd.h:565
rocprofiler_code_object_storage_type_t storage_type
storage type of the code object reader used to load the loaded code object
ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
ROCProfiler Code Object Load Tracer Callback Record.
ROCProfiler-SDK API interface.