26#include <rocprofiler-sdk/buffer.h>
27#include <rocprofiler-sdk/callback_tracing.h>
28#include <rocprofiler-sdk/external_correlation.h>
29#include <rocprofiler-sdk/fwd.h>
30#include <rocprofiler-sdk/internal_threading.h>
32#include <rocprofiler-sdk/cxx/name_info.hpp>
33#include <rocprofiler-sdk/cxx/utility.hpp>
35#include <cereal/archives/binary.hpp>
36#include <cereal/archives/json.hpp>
37#include <cereal/archives/portable_binary.hpp>
38#include <cereal/cereal.hpp>
39#include <cereal/types/array.hpp>
40#include <cereal/types/atomic.hpp>
41#include <cereal/types/bitset.hpp>
42#include <cereal/types/chrono.hpp>
43#include <cereal/types/common.hpp>
44#include <cereal/types/complex.hpp>
45#include <cereal/types/deque.hpp>
46#include <cereal/types/functional.hpp>
47#include <cereal/types/list.hpp>
48#include <cereal/types/map.hpp>
49#include <cereal/types/optional.hpp>
50#include <cereal/types/queue.hpp>
51#include <cereal/types/set.hpp>
52#include <cereal/types/stack.hpp>
53#include <cereal/types/string.hpp>
54#include <cereal/types/unordered_map.hpp>
55#include <cereal/types/unordered_set.hpp>
56#include <cereal/types/utility.hpp>
57#include <cereal/types/variant.hpp>
58#include <cereal/types/vector.hpp>
66#if defined(ROCPROFILER_SDK_CXX_SERIALIZATION_LOAD_DEBUG) && \
67 ROCPROFILER_SDK_CXX_SERIALIZATION_LOAD_DEBUG > 0
68# define ROCP_SDK_LOAD_MESSAGE(NAME) \
69 std::clog << "[" << __PRETTY_FUNCTION__ << "] loading JSON field " << NAME << "...\n" \
72# define ROCP_SDK_LOAD_MESSAGE(NAME)
75#define ROCP_SDK_LOAD_DATA_FIELD(FIELD) \
76 ROCP_SDK_LOAD_MESSAGE(#FIELD); \
77 ar(make_nvp(#FIELD, data.FIELD))
78#define ROCP_SDK_LOAD_DATA_VALUE(NAME, VALUE) \
79 ROCP_SDK_LOAD_MESSAGE(NAME); \
80 ar(make_nvp(NAME, data.VALUE))
81#define ROCP_SDK_LOAD_VALUE(NAME, VALUE) \
82 ROCP_SDK_LOAD_MESSAGE(NAME); \
83 ar(make_nvp(NAME, VALUE))
84#define ROCP_SDK_LOAD_DATA_CSTR(FIELD) \
86 ROCP_SDK_LOAD_MESSAGE(#FIELD); \
87 auto _val = new std::string{}; \
88 ar(make_nvp(#FIELD, *_val)); \
89 data.FIELD = _val->c_str(); \
91#define ROCP_SDK_LOAD_DATA_BITFIELD(NAME, VALUE) \
93 ROCP_SDK_LOAD_MESSAGE(NAME); \
94 auto _val = data.VALUE; \
95 ar(make_nvp(NAME, _val)); \
99#if !defined(ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN)
100# define ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN \
105#if !defined(ROCPROFILER_SDK_CEREAL_NAMESPACE_END)
106# define ROCPROFILER_SDK_CEREAL_NAMESPACE_END }
109ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN
111template <
typename ArchiveT>
115 ROCP_SDK_LOAD_DATA_FIELD(handle);
118template <
typename ArchiveT>
120load(ArchiveT& ar, HsaCacheType& data)
122 ROCP_SDK_LOAD_DATA_BITFIELD(
"Data", ui32.Data);
123 ROCP_SDK_LOAD_DATA_BITFIELD(
"Instruction", ui32.Instruction);
124 ROCP_SDK_LOAD_DATA_BITFIELD(
"CPU", ui32.CPU);
125 ROCP_SDK_LOAD_DATA_BITFIELD(
"HSACU", ui32.HSACU);
128template <
typename ArchiveT>
130load(ArchiveT& ar, HSA_LINKPROPERTY& data)
132 ROCP_SDK_LOAD_DATA_BITFIELD(
"Override", ui32.Override);
133 ROCP_SDK_LOAD_DATA_BITFIELD(
"NonCoherent", ui32.NonCoherent);
134 ROCP_SDK_LOAD_DATA_BITFIELD(
"NoAtomics32bit", ui32.NoAtomics32bit);
135 ROCP_SDK_LOAD_DATA_BITFIELD(
"NoAtomics64bit", ui32.NoAtomics64bit);
136 ROCP_SDK_LOAD_DATA_BITFIELD(
"NoPeerToPeerDMA", ui32.NoPeerToPeerDMA);
139template <
typename ArchiveT>
141load(ArchiveT& ar, HSA_CAPABILITY& data)
143 ROCP_SDK_LOAD_DATA_BITFIELD(
"HotPluggable", ui32.HotPluggable);
144 ROCP_SDK_LOAD_DATA_BITFIELD(
"HSAMMUPresent", ui32.HSAMMUPresent);
145 ROCP_SDK_LOAD_DATA_BITFIELD(
"SharedWithGraphics", ui32.SharedWithGraphics);
146 ROCP_SDK_LOAD_DATA_BITFIELD(
"QueueSizePowerOfTwo", ui32.QueueSizePowerOfTwo);
147 ROCP_SDK_LOAD_DATA_BITFIELD(
"QueueSize32bit", ui32.QueueSize32bit);
148 ROCP_SDK_LOAD_DATA_BITFIELD(
"QueueIdleEvent", ui32.QueueIdleEvent);
149 ROCP_SDK_LOAD_DATA_BITFIELD(
"VALimit", ui32.VALimit);
150 ROCP_SDK_LOAD_DATA_BITFIELD(
"WatchPointsSupported", ui32.WatchPointsSupported);
151 ROCP_SDK_LOAD_DATA_BITFIELD(
"WatchPointsTotalBits", ui32.WatchPointsTotalBits);
152 ROCP_SDK_LOAD_DATA_BITFIELD(
"DoorbellType", ui32.DoorbellType);
153 ROCP_SDK_LOAD_DATA_BITFIELD(
"AQLQueueDoubleMap", ui32.AQLQueueDoubleMap);
154 ROCP_SDK_LOAD_DATA_BITFIELD(
"DebugTrapSupported", ui32.DebugTrapSupported);
155 ROCP_SDK_LOAD_DATA_BITFIELD(
"WaveLaunchTrapOverrideSupported",
156 ui32.WaveLaunchTrapOverrideSupported);
157 ROCP_SDK_LOAD_DATA_BITFIELD(
"WaveLaunchModeSupported", ui32.WaveLaunchModeSupported);
158 ROCP_SDK_LOAD_DATA_BITFIELD(
"PreciseMemoryOperationsSupported",
159 ui32.PreciseMemoryOperationsSupported);
160 ROCP_SDK_LOAD_DATA_BITFIELD(
"DEPRECATED_SRAM_EDCSupport", ui32.DEPRECATED_SRAM_EDCSupport);
161 ROCP_SDK_LOAD_DATA_BITFIELD(
"Mem_EDCSupport", ui32.Mem_EDCSupport);
162 ROCP_SDK_LOAD_DATA_BITFIELD(
"RASEventNotify", ui32.RASEventNotify);
163 ROCP_SDK_LOAD_DATA_BITFIELD(
"ASICRevision", ui32.ASICRevision);
164 ROCP_SDK_LOAD_DATA_BITFIELD(
"SRAM_EDCSupport", ui32.SRAM_EDCSupport);
165 ROCP_SDK_LOAD_DATA_BITFIELD(
"SVMAPISupported", ui32.SVMAPISupported);
166 ROCP_SDK_LOAD_DATA_BITFIELD(
"CoherentHostAccess", ui32.CoherentHostAccess);
167 ROCP_SDK_LOAD_DATA_BITFIELD(
"DebugSupportedFirmware", ui32.DebugSupportedFirmware);
170template <
typename ArchiveT>
174 ROCP_SDK_LOAD_DATA_FIELD(x);
175 ROCP_SDK_LOAD_DATA_FIELD(y);
176 ROCP_SDK_LOAD_DATA_FIELD(z);
179template <
typename ArchiveT>
181load(ArchiveT& ar, HSA_MEMORYPROPERTY& data)
183 ROCP_SDK_LOAD_DATA_BITFIELD(
"HotPluggable", ui32.HotPluggable);
184 ROCP_SDK_LOAD_DATA_BITFIELD(
"NonVolatile", ui32.NonVolatile);
187template <
typename ArchiveT>
189load(ArchiveT& ar, HSA_ENGINE_VERSION& data)
191 ROCP_SDK_LOAD_DATA_BITFIELD(
"uCodeSDMA", uCodeSDMA);
192 ROCP_SDK_LOAD_DATA_BITFIELD(
"uCodeRes", uCodeRes);
195template <
typename ArchiveT>
197load(ArchiveT& ar, HSA_ENGINE_ID& data)
199 ROCP_SDK_LOAD_DATA_BITFIELD(
"uCode", ui32.uCode);
200 ROCP_SDK_LOAD_DATA_BITFIELD(
"Major", ui32.Major);
201 ROCP_SDK_LOAD_DATA_BITFIELD(
"Minor", ui32.Minor);
202 ROCP_SDK_LOAD_DATA_BITFIELD(
"Stepping", ui32.Stepping);
205template <
typename ArchiveT>
209 ROCP_SDK_LOAD_DATA_FIELD(processor_id_low);
210 ROCP_SDK_LOAD_DATA_FIELD(size);
211 ROCP_SDK_LOAD_DATA_FIELD(level);
212 ROCP_SDK_LOAD_DATA_FIELD(cache_line_size);
213 ROCP_SDK_LOAD_DATA_FIELD(cache_lines_per_tag);
214 ROCP_SDK_LOAD_DATA_FIELD(association);
215 ROCP_SDK_LOAD_DATA_FIELD(latency);
216 ROCP_SDK_LOAD_DATA_FIELD(type);
219template <
typename ArchiveT>
223 ROCP_SDK_LOAD_DATA_FIELD(type);
224 ROCP_SDK_LOAD_DATA_FIELD(version_major);
225 ROCP_SDK_LOAD_DATA_FIELD(version_minor);
226 ROCP_SDK_LOAD_DATA_FIELD(node_from);
227 ROCP_SDK_LOAD_DATA_FIELD(node_to);
228 ROCP_SDK_LOAD_DATA_FIELD(weight);
229 ROCP_SDK_LOAD_DATA_FIELD(min_latency);
230 ROCP_SDK_LOAD_DATA_FIELD(max_latency);
231 ROCP_SDK_LOAD_DATA_FIELD(min_bandwidth);
232 ROCP_SDK_LOAD_DATA_FIELD(max_bandwidth);
233 ROCP_SDK_LOAD_DATA_FIELD(recommended_transfer_size);
234 ROCP_SDK_LOAD_DATA_FIELD(flags);
237template <
typename ArchiveT>
241 ROCP_SDK_LOAD_DATA_FIELD(heap_type);
242 ROCP_SDK_LOAD_DATA_FIELD(flags);
243 ROCP_SDK_LOAD_DATA_FIELD(width);
244 ROCP_SDK_LOAD_DATA_FIELD(mem_clk_max);
245 ROCP_SDK_LOAD_DATA_FIELD(size_in_bytes);
248template <
typename ArchiveT>
252 ROCP_SDK_LOAD_DATA_FIELD(size);
253 ROCP_SDK_LOAD_DATA_FIELD(
id);
254 ROCP_SDK_LOAD_DATA_FIELD(type);
255 ROCP_SDK_LOAD_DATA_FIELD(cpu_cores_count);
256 ROCP_SDK_LOAD_DATA_FIELD(simd_count);
257 ROCP_SDK_LOAD_DATA_FIELD(mem_banks_count);
258 ROCP_SDK_LOAD_DATA_FIELD(caches_count);
259 ROCP_SDK_LOAD_DATA_FIELD(io_links_count);
260 ROCP_SDK_LOAD_DATA_FIELD(cpu_core_id_base);
261 ROCP_SDK_LOAD_DATA_FIELD(simd_id_base);
262 ROCP_SDK_LOAD_DATA_FIELD(max_waves_per_simd);
263 ROCP_SDK_LOAD_DATA_FIELD(lds_size_in_kb);
264 ROCP_SDK_LOAD_DATA_FIELD(gds_size_in_kb);
265 ROCP_SDK_LOAD_DATA_FIELD(num_gws);
266 ROCP_SDK_LOAD_DATA_FIELD(wave_front_size);
267 ROCP_SDK_LOAD_DATA_FIELD(num_xcc);
268 ROCP_SDK_LOAD_DATA_FIELD(cu_count);
269 ROCP_SDK_LOAD_DATA_FIELD(array_count);
270 ROCP_SDK_LOAD_DATA_FIELD(num_shader_banks);
271 ROCP_SDK_LOAD_DATA_FIELD(simd_arrays_per_engine);
272 ROCP_SDK_LOAD_DATA_FIELD(cu_per_simd_array);
273 ROCP_SDK_LOAD_DATA_FIELD(simd_per_cu);
274 ROCP_SDK_LOAD_DATA_FIELD(max_slots_scratch_cu);
275 ROCP_SDK_LOAD_DATA_FIELD(gfx_target_version);
276 ROCP_SDK_LOAD_DATA_FIELD(vendor_id);
277 ROCP_SDK_LOAD_DATA_FIELD(device_id);
278 ROCP_SDK_LOAD_DATA_FIELD(location_id);
279 ROCP_SDK_LOAD_DATA_FIELD(domain);
280 ROCP_SDK_LOAD_DATA_FIELD(drm_render_minor);
281 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_engines);
282 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_xgmi_engines);
283 ROCP_SDK_LOAD_DATA_FIELD(num_sdma_queues_per_engine);
284 ROCP_SDK_LOAD_DATA_FIELD(num_cp_queues);
285 ROCP_SDK_LOAD_DATA_FIELD(max_engine_clk_ccompute);
286 ROCP_SDK_LOAD_DATA_FIELD(max_engine_clk_fcompute);
287 ROCP_SDK_LOAD_DATA_FIELD(sdma_fw_version);
288 ROCP_SDK_LOAD_DATA_FIELD(fw_version);
289 ROCP_SDK_LOAD_DATA_FIELD(capability);
290 ROCP_SDK_LOAD_DATA_FIELD(cu_per_engine);
291 ROCP_SDK_LOAD_DATA_FIELD(max_waves_per_cu);
292 ROCP_SDK_LOAD_DATA_FIELD(family_id);
293 ROCP_SDK_LOAD_DATA_FIELD(workgroup_max_size);
294 ROCP_SDK_LOAD_DATA_FIELD(grid_max_size);
295 ROCP_SDK_LOAD_DATA_FIELD(local_mem_size);
296 ROCP_SDK_LOAD_DATA_FIELD(hive_id);
297 ROCP_SDK_LOAD_DATA_FIELD(gpu_id);
298 ROCP_SDK_LOAD_DATA_FIELD(workgroup_max_dim);
299 ROCP_SDK_LOAD_DATA_FIELD(grid_max_dim);
300 ROCP_SDK_LOAD_DATA_CSTR(name);
301 ROCP_SDK_LOAD_DATA_CSTR(vendor_name);
302 ROCP_SDK_LOAD_DATA_CSTR(product_name);
303 ROCP_SDK_LOAD_DATA_CSTR(model_name);
304 ROCP_SDK_LOAD_DATA_FIELD(node_id);
305 ROCP_SDK_LOAD_DATA_FIELD(logical_node_id);
306 ROCP_SDK_LOAD_DATA_FIELD(logical_node_type_id);
308 auto generate = [&](
auto name,
const auto*& value,
auto& size) {
310 std::remove_const_t<std::remove_pointer_t<std::remove_reference_t<
decltype(value)>>>;
311 auto vec = std::vector<value_type>{};
312 ar(make_nvp(name, vec));
314 auto* value_m =
new value_type[size];
315 for(uint64_t i = 0; i < size; ++i)
316 value_m[i] = vec.at(i);
325template <
typename ArchiveT>
329 ROCP_SDK_LOAD_DATA_FIELD(handle);
332template <
typename ArchiveT>
336 ROCP_SDK_LOAD_DATA_FIELD(size);
337 ROCP_SDK_LOAD_DATA_FIELD(code_object_id);
338 ROCP_SDK_LOAD_DATA_FIELD(agent_id);
339 ROCP_SDK_LOAD_DATA_CSTR(uri);
340 ROCP_SDK_LOAD_DATA_FIELD(load_base);
341 ROCP_SDK_LOAD_DATA_FIELD(load_size);
342 ROCP_SDK_LOAD_DATA_FIELD(load_delta);
343 ROCP_SDK_LOAD_DATA_FIELD(storage_type);
344 if(data.
storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE)
346 ROCP_SDK_LOAD_DATA_FIELD(storage_file);
348 else if(data.
storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY)
350 ROCP_SDK_LOAD_DATA_FIELD(memory_base);
351 ROCP_SDK_LOAD_DATA_FIELD(memory_size);
355template <
typename ArchiveT>
359 ROCP_SDK_LOAD_DATA_FIELD(size);
360 ROCP_SDK_LOAD_DATA_FIELD(kernel_id);
361 ROCP_SDK_LOAD_DATA_FIELD(code_object_id);
362 ROCP_SDK_LOAD_DATA_CSTR(kernel_name);
363 ROCP_SDK_LOAD_DATA_FIELD(kernel_object);
364 ROCP_SDK_LOAD_DATA_FIELD(kernarg_segment_size);
365 ROCP_SDK_LOAD_DATA_FIELD(kernarg_segment_alignment);
366 ROCP_SDK_LOAD_DATA_FIELD(group_segment_size);
367 ROCP_SDK_LOAD_DATA_FIELD(private_segment_size);
368 ROCP_SDK_LOAD_DATA_FIELD(sgpr_count);
369 ROCP_SDK_LOAD_DATA_FIELD(arch_vgpr_count);
370 ROCP_SDK_LOAD_DATA_FIELD(accum_vgpr_count);
371 ROCP_SDK_LOAD_DATA_FIELD(kernel_code_entry_byte_offset);
372 ROCP_SDK_LOAD_DATA_FIELD(kernel_address);
375ROCPROFILER_SDK_CEREAL_NAMESPACE_END
377#undef ROCP_SDK_LOAD_DATA_FIELD
378#undef ROCP_SDK_LOAD_DATA_VALUE
379#undef ROCP_SDK_LOAD_DATA_CSTR
380#undef ROCP_SDK_LOAD_DATA_BITFIELD
const rocprofiler_agent_io_link_t * io_links
array of IO link info
const rocprofiler_agent_mem_bank_t * mem_banks
array of memory bank info
const rocprofiler_agent_cache_t * caches
array of cache info
Cache information for an agent.
IO link information for an agent.
Memory bank information for an agent.
Stores the properties of an agent (CPU, GPU, etc.)
Multi-dimensional struct of data used to describe GPU workgroup and grid sizes.
Stores memory address for profiling.
rocprofiler_code_object_storage_type_t storage_type
storage type of the code object reader used to load the loaded code object
ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
ROCProfiler Code Object Load Tracer Callback Record.
ROCProfiler-SDK API interface.