rocprofiler-sdk/cxx/codeobj/code_printing.hpp Source File

rocprofiler-sdk/cxx/codeobj/code_printing.hpp Source File#

ROCprofiler-SDK developer API: rocprofiler-sdk/cxx/codeobj/code_printing.hpp Source File
ROCprofiler-SDK developer API 1.0.0
ROCm Profiling API and tools
code_printing.hpp
1// MIT License
2//
3// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
25#include "disassembly.hpp"
26#include "segment.hpp"
27
28#include <dwarf.h>
29#include <elfutils/libdw.h>
30#include <hsa/amd_hsa_elf.h>
31
32#include <algorithm>
33#include <cstring>
34#include <fstream>
35#include <iostream>
36#include <limits>
37#include <map>
38#include <memory>
39#include <optional>
40#include <string>
41#include <unordered_map>
42#include <vector>
43
44namespace rocprofiler
45{
46namespace sdk
47{
48namespace codeobj
49{
50namespace disassembly
51{
52using marker_id_t = segment::marker_id_t;
53
54struct Instruction
55{
56 Instruction() = default;
57 Instruction(std::string&& _inst, size_t _size)
58 : inst(std::move(_inst))
59 , size(_size)
60 {}
61 std::string inst{};
62 std::string comment{};
63 uint64_t faddr{0};
64 uint64_t vaddr{0};
65 size_t size{0};
66 uint64_t ld_addr{0}; // Instruction load address, if from loaded codeobj
67 marker_id_t codeobj_id{0}; // Instruction code object load id, if from loaded codeobj
68
69 static constexpr std::string_view separator = " -> ";
70};
71
72/**
73 * @brief Extracts inlined function call stack information for a given address
74 *
75 * This struct is used to recursively search through DWARF debug information to find all inlined
76 * functions that contain the specified address, building a complete call stack from the outermost
77 * function down to the innermost inlined function.
78 */
79struct DIEInfo
80{
81 struct DRange
82 {
83 Dwarf_Addr low{std::numeric_limits<Dwarf_Addr>::max()};
84 Dwarf_Addr high{0};
85
86 // Makes sure this range includes the "other" range
87 void expand(const DRange& other)
88 {
89 low = std::min(low, other.low);
90 high = std::max(high, other.high);
91 }
92
93 /**
94 * @brief Is the address inside the low/hihg range?
95 */
96 bool contains(Dwarf_Addr addr) const { return low <= addr && high > addr; }
97 };
98
99 DIEInfo(Dwarf_Die* die);
100
101 /**
102 * @brief Recursively traverses all children DIEInfos to find inlined functions at a specific
103 * address
104 *
105 * This function performs a depth-first traversal of the DWARF debug information tree,
106 * checking each DIE for inlined function information that covers the specified address.
107 * It processes both the current DIE and all its children (including siblings at each level)
108 * to ensure comprehensive coverage of all possible inlined function contexts.
109 *
110 * The traversal is necessary because inlined functions can be nested (function A inlines
111 * function B which inlines function C) and multiple inlined functions can exist at the
112 * same scope level as siblings in the DWARF tree.
113 *
114 * @param addr The address to search for inlined function information
115 * @param call_stack Reference to vector that accumulates the call stack information
116 * @return True if either this instance or one of the children added an entry to the stack
117 */
118 bool getCallStackRecursive(Dwarf_Addr addr, std::vector<std::string>& call_stack);
119
120 std::vector<DRange> all_ranges{};
121 std::vector<std::unique_ptr<DIEInfo>> children{};
122
123 // Union of ranges, or the same as dwarf_lo/hi pc
124 DRange total_range{};
125 // Union of all children's children_range + this total range
126 DRange children_range{};
127
128 std::string file_and_line{};
129
130 void addRange(const DRange& range)
131 {
132 all_ranges.push_back(range);
133 total_range.expand(range);
134 }
135};
136
137class CodeobjDecoderComponent
138{
139 struct ProtectedFd
140 {
141 ProtectedFd(std::string_view uri)
142 {
143#if defined(_GNU_SOURCE) && defined(MFD_ALLOW_SEALING) && defined(MFD_CLOEXEC)
144 m_fd = ::memfd_create(uri.data(), MFD_ALLOW_SEALING | MFD_CLOEXEC);
145#endif
146 if(m_fd == -1) m_fd = ::open("/tmp", O_TMPFILE | O_RDWR, 0666);
147 if(m_fd == -1) throw std::runtime_error("Could not create a file for codeobj!");
148 }
149 ~ProtectedFd()
150 {
151 if(m_fd != -1) ::close(m_fd);
152 }
153 int m_fd{-1};
154 };
155
156public:
157 CodeobjDecoderComponent(const char* codeobj_data, uint64_t codeobj_size)
158 {
159 ProtectedFd prot("");
160 if(::write(prot.m_fd, codeobj_data, codeobj_size) != static_cast<int64_t>(codeobj_size))
161 throw std::runtime_error("Could not write to temporary file!");
162
163 ::lseek(prot.m_fd, 0, SEEK_SET);
164 fsync(prot.m_fd);
165
166 m_line_number_map = {};
167
168 std::unique_ptr<Dwarf, void (*)(Dwarf*)> dbg(dwarf_begin(prot.m_fd, DWARF_C_READ),
169 [](Dwarf* _dbg) { dwarf_end(_dbg); });
170
171 if(dbg)
172 {
173 Dwarf_Off cu_offset{};
174 Dwarf_Off next_offset{};
175 size_t header_size{};
176
177 std::map<Dwarf_Addr, std::string> line_addrs{};
178 std::unordered_map<Dwarf_Off, std::unique_ptr<DIEInfo>> diemap{};
179
180 while(
181 dwarf_nextcu(
182 dbg.get(), cu_offset, &next_offset, &header_size, nullptr, nullptr, nullptr) ==
183 0)
184 {
185 Dwarf_Die die{};
186 if(!dwarf_offdie(dbg.get(), cu_offset + header_size, &die))
187 {
188 cu_offset = next_offset;
189 continue;
190 }
191
192 Dwarf_Lines* lines;
193 size_t line_count;
194 if(dwarf_getsrclines(&die, &lines, &line_count) != 0)
195 {
196 cu_offset = next_offset;
197 continue;
198 }
199
200 for(size_t i = 0; i < line_count; ++i)
201 {
202 Dwarf_Addr addr;
203 int line_number;
204 Dwarf_Line* line = dwarf_onesrcline(lines, i);
205
206 if(line && dwarf_lineaddr(line, &addr) == 0 &&
207 dwarf_lineno(line, &line_number) == 0 && line_number != 0)
208 {
209 std::string src = dwarf_linesrc(line, nullptr, nullptr);
210 auto dwarf_line = src + ':' + std::to_string(line_number);
211
212 std::vector<std::string> call_stack_info{};
213
214 auto& die_ptr = diemap[dwarf_dieoffset(&die)];
215 if(die_ptr == nullptr) die_ptr = std::make_unique<DIEInfo>(&die);
216 die_ptr->getCallStackRecursive(addr, call_stack_info);
217
218 size_t capacity = dwarf_line.size() +
219 Instruction::separator.size() * call_stack_info.size();
220 for(const auto& call : call_stack_info)
221 capacity += call.size();
222
223 dwarf_line.reserve(capacity);
224 for(const auto& call : call_stack_info)
225 {
226 dwarf_line += Instruction::separator;
227 dwarf_line += call;
228 }
229 line_addrs[addr] = std::move(dwarf_line);
230 }
231 }
232 cu_offset = next_offset;
233 }
234
235 auto it = line_addrs.begin();
236 if(it != line_addrs.end())
237 {
238 while(std::next(it) != line_addrs.end())
239 {
240 uint64_t delta = std::next(it)->first - it->first;
241 auto segment = segment::address_range_t{it->first, delta, 0};
242 m_line_number_map.emplace(segment, std::move(it->second));
243 it++;
244 }
245 auto segment = segment::address_range_t{it->first, codeobj_size - it->first, 0};
246 m_line_number_map.emplace(segment, std::move(it->second));
247 }
248 }
249
250 // Can throw
251 disassembly = std::make_unique<DisassemblyInstance>(codeobj_data, codeobj_size);
252 try
253 {
254 m_symbol_map = disassembly->GetKernelMap(); // Can throw
255 } catch(...)
256 {}
257 }
258 ~CodeobjDecoderComponent() = default;
259
260 std::optional<uint64_t> va2fo(uint64_t vaddr) const
261 {
262 if(disassembly) return disassembly->va2fo(vaddr);
263 return std::nullopt;
264 };
265
266 std::unique_ptr<Instruction> disassemble_instruction(uint64_t faddr, uint64_t vaddr)
267 {
268 if(!disassembly) throw std::exception();
269
270 auto pair = disassembly->ReadInstruction(faddr);
271 auto inst = std::make_unique<Instruction>(std::move(pair.first), pair.second);
272 inst->faddr = faddr;
273 inst->vaddr = vaddr;
274
275 auto it = m_line_number_map.find({vaddr, 0, 0});
276 if(it != m_line_number_map.end()) inst->comment = it->second;
277
278 return inst;
279 }
280
281 std::map<uint64_t, SymbolInfo> m_symbol_map{};
282 std::vector<std::shared_ptr<Instruction>> instructions{};
283 std::unique_ptr<DisassemblyInstance> disassembly{};
284
285 std::map<segment::address_range_t, std::string> m_line_number_map{};
286};
287
288class LoadedCodeobjDecoder
289{
290public:
291 LoadedCodeobjDecoder(const char* filepath, uint64_t _load_addr, uint64_t _memsize)
292 : load_addr(_load_addr)
293 , load_end(_load_addr + _memsize)
294 {
295 if(!filepath) throw std::runtime_error("Empty filepath.");
296
297 std::string_view fpath(filepath);
298
299 if(fpath.rfind(".out") + 4 == fpath.size())
300 {
301 std::ifstream file(filepath, std::ios::in | std::ios::binary);
302
303 if(!file.is_open()) throw std::runtime_error("Invalid file " + std::string(filepath));
304
305 std::vector<char> buffer;
306 file.seekg(0, file.end);
307 buffer.resize(file.tellg());
308 file.seekg(0, file.beg);
309 file.read(buffer.data(), buffer.size());
310
311 decoder = std::make_unique<CodeobjDecoderComponent>(buffer.data(), buffer.size());
312 }
313 else
314 {
315 std::unique_ptr<CodeObjectBinary> binary = std::make_unique<CodeObjectBinary>(filepath);
316 auto& buffer = binary->buffer;
317 decoder = std::make_unique<CodeobjDecoderComponent>(buffer.data(), buffer.size());
318 }
319 }
320 LoadedCodeobjDecoder(const void* data, uint64_t size, uint64_t _load_addr, size_t _memsize)
321 : load_addr(_load_addr)
322 , load_end(load_addr + _memsize)
323 {
324 decoder = std::make_unique<CodeobjDecoderComponent>(static_cast<const char*>(data), size);
325 }
326 std::unique_ptr<Instruction> get(uint64_t ld_addr)
327 {
328 if(!decoder || ld_addr < load_addr) return nullptr;
329
330 uint64_t voffset = ld_addr - load_addr;
331 auto faddr = decoder->va2fo(voffset);
332 if(!faddr) return nullptr;
333
334 auto unique = decoder->disassemble_instruction(*faddr, voffset);
335 if(unique == nullptr || unique->size == 0) return nullptr;
336 unique->ld_addr = ld_addr;
337 return unique;
338 }
339
340 uint64_t begin() const { return load_addr; };
341 uint64_t end() const { return load_end; }
342 uint64_t size() const { return load_end - load_addr; }
343 bool inrange(uint64_t addr) const { return addr >= begin() && addr < end(); }
344
345 const char* getSymbolName(uint64_t addr) const
346 {
347 if(!decoder) return nullptr;
348
349 auto it = decoder->m_symbol_map.find(addr - load_addr);
350 if(it != decoder->m_symbol_map.end()) return it->second.name.data();
351
352 return nullptr;
353 }
354
355 std::map<uint64_t, SymbolInfo>& getSymbolMap() const
356 {
357 if(!decoder) throw std::exception();
358 return decoder->m_symbol_map;
359 }
360 const uint64_t load_addr;
361
362private:
363 uint64_t load_end{0};
364
365 std::unique_ptr<CodeobjDecoderComponent> decoder{nullptr};
366};
367
368/**
369 * @brief Maps ID and offsets into instructions
370 */
372{
373public:
374 CodeobjMap() = default;
375 virtual ~CodeobjMap() = default;
376
377 virtual void addDecoder(const char* filepath,
378 marker_id_t id,
379 uint64_t load_addr,
380 uint64_t memsize)
381 {
382 decoders[id] = std::make_shared<LoadedCodeobjDecoder>(filepath, load_addr, memsize);
383 }
384
385 virtual void addDecoder(const void* data,
386 size_t memory_size,
387 marker_id_t id,
388 uint64_t load_addr,
389 uint64_t memsize)
390 {
391 decoders[id] =
392 std::make_shared<LoadedCodeobjDecoder>(data, memory_size, load_addr, memsize);
393 }
394
395 virtual bool removeDecoderbyId(marker_id_t id) { return decoders.erase(id) != 0; }
396
397 std::unique_ptr<Instruction> get(marker_id_t id, uint64_t offset)
398 {
399 try
400 {
401 auto& decoder = decoders.at(id);
402 auto inst = decoder->get(decoder->begin() + offset);
403 if(inst != nullptr) inst->codeobj_id = id;
404 return inst;
405 } catch(std::out_of_range&)
406 {}
407 return nullptr;
408 }
409
410 const char* getSymbolName(marker_id_t id, uint64_t offset)
411 {
412 try
413 {
414 auto& decoder = decoders.at(id);
415 uint64_t vaddr = decoder->begin() + offset;
416 if(decoder->inrange(vaddr)) return decoder->getSymbolName(vaddr);
417 } catch(std::out_of_range&)
418 {}
419 return nullptr;
420 }
421
422protected:
423 std::unordered_map<marker_id_t, std::shared_ptr<LoadedCodeobjDecoder>> decoders{};
424};
425
426/**
427 * @brief Translates virtual addresses to elf file offsets
428 */
430{
431 using Super = CodeobjMap;
432
433public:
434 CodeobjAddressTranslate() = default;
435 ~CodeobjAddressTranslate() override = default;
436
437 void addDecoder(const char* filepath,
438 marker_id_t id,
439 uint64_t load_addr,
440 uint64_t memsize) override
441 {
442 this->Super::addDecoder(filepath, id, load_addr, memsize);
443 auto ptr = decoders.at(id);
444 table.insert({ptr->begin(), ptr->size(), id});
445 }
446
447 void addDecoder(const void* data,
448 size_t memory_size,
449 marker_id_t id,
450 uint64_t load_addr,
451 uint64_t memsize) override
452 {
453 this->Super::addDecoder(data, memory_size, id, load_addr, memsize);
454 auto ptr = decoders.at(id);
455 table.insert({ptr->begin(), ptr->size(), id});
456 }
457
458 bool removeDecoder(marker_id_t id, uint64_t load_addr)
459 {
460 return table.remove(load_addr) && this->Super::removeDecoderbyId(id);
461 }
462
463 bool removeDecoder(marker_id_t id)
464 {
465 uint64_t addr = 0;
466 if(decoders.find(id) != decoders.end()) addr = decoders.at(id)->begin();
467
468 return removeDecoder(id, addr);
469 }
470
471 std::unique_ptr<Instruction> get(uint64_t vaddr)
472 {
473 auto addr_range = table.find_codeobj_in_range(vaddr);
474 return this->Super::get(addr_range.id, vaddr - addr_range.addr);
475 }
476
477 std::unique_ptr<Instruction> get(marker_id_t id, uint64_t offset)
478 {
479 if(id == 0)
480 return get(offset);
481 else
482 return this->Super::get(id, offset);
483 }
484
485 const char* getSymbolName(uint64_t vaddr)
486 {
487 for(auto& [_, decoder] : decoders)
488 {
489 if(!decoder->inrange(vaddr)) continue;
490 return decoder->getSymbolName(vaddr);
491 }
492 return nullptr;
493 }
494
495 std::map<uint64_t, SymbolInfo> getSymbolMap() const
496 {
497 std::map<uint64_t, SymbolInfo> symbols;
498
499 for(const auto& [_, dec] : decoders)
500 {
501 auto& smap = dec->getSymbolMap();
502 for(auto& [vaddr, sym] : smap)
503 symbols[vaddr + dec->load_addr] = sym;
504 }
505
506 return symbols;
507 }
508
509 std::map<uint64_t, SymbolInfo> getSymbolMap(marker_id_t id) const
510 {
511 if(decoders.find(id) == decoders.end()) return {};
512
513 try
514 {
515 return decoders.at(id)->getSymbolMap();
516 } catch(...)
517 {
518 return {};
519 }
520 }
521
522private:
524};
525
526inline DIEInfo::DIEInfo(Dwarf_Die* die)
527{
528 if(dwarf_tag(die) == DW_TAG_inlined_subroutine)
529 {
530 Dwarf_Addr low_pc{};
531 Dwarf_Addr high_pc{};
532
533 // Check if this inlined subroutine covers the target address
534 // First try simple contiguous range (low_pc to high_pc)
535 if(dwarf_lowpc(die, &low_pc) == 0 && dwarf_highpc(die, &high_pc) == 0)
536 {
537 addRange(DRange{low_pc, high_pc});
538 }
539 else
540 {
541 // Function may have non-contiguous ranges
542 // Check all address ranges associated with this DIE
543 Dwarf_Addr base{};
544 ptrdiff_t offset{};
545 while((offset = dwarf_ranges(die, offset, &base, &low_pc, &high_pc)) > 0)
546 addRange(DRange{low_pc, high_pc});
547 }
548
549 // Extract call site information - where this function was inlined
550 Dwarf_Attribute call_file_attr{};
551 Dwarf_Attribute call_line_attr{};
552 Dwarf_Word call_file{};
553 Dwarf_Word call_line{};
554
555 // Get the file and line number where this function was called/inlined
556
557 if(!dwarf_attr(die, DW_AT_call_file, &call_file_attr) ||
558 !dwarf_attr(die, DW_AT_call_line, &call_line_attr) ||
559 dwarf_formudata(&call_file_attr, &call_file) != 0 ||
560 dwarf_formudata(&call_line_attr, &call_line) != 0)
561 return; // No call site information available
562
563 // Get the compilation unit to resolve file names
564 Dwarf_Die cu_die{};
565 if(!dwarf_diecu(die, &cu_die, nullptr, nullptr)) return;
566
567 // Get the source files table for this compilation unit
568 Dwarf_Files* files{};
569 size_t nfiles{};
570 if(dwarf_getsrcfiles(&cu_die, &files, &nfiles) == 0 && call_file < nfiles)
571 {
572 if(const char* filename = dwarf_filesrc(files, call_file, nullptr, nullptr))
573 {
574 // Add "filename:line" to call stack showing where this function was inlined
575 file_and_line = std::string(filename) + ":" + std::to_string(call_line);
576 return;
577 }
578 }
579
580 children_range = total_range;
581 }
582
583 Dwarf_Die child{};
584
585 // Traverse children (recursive part)
586 if(dwarf_child(die, &child) == 0)
587 {
588 do
589 {
590 children.emplace_back(std::make_unique<DIEInfo>(&child));
591 children_range.expand(children.back()->children_range);
592
593 } while(dwarf_siblingof(&child, &child) == 0);
594 }
595}
596
597inline bool
598DIEInfo::getCallStackRecursive(Dwarf_Addr addr, std::vector<std::string>& call_stack)
599{
600 if(!children_range.contains(addr)) return false;
601
602 bool addedOne = false;
603
604 for(auto& child : children)
605 {
606 // Only add from one of the children
607 addedOne = child->getCallStackRecursive(addr, call_stack);
608 if(addedOne) break;
609 }
610
611 if(total_range.contains(addr))
612 {
613 for(auto& range : all_ranges)
614 {
615 if(!range.contains(addr)) continue;
616
617 call_stack.emplace_back(file_and_line);
618 return true;
619 }
620 }
621
622 // Check if one of the child nodes added to the stack
623 return addedOne;
624}
625
626} // namespace disassembly
627} // namespace codeobj
628} // namespace sdk
629} // namespace rocprofiler
Translates virtual addresses to elf file offsets.
Maps ID and offsets into instructions.
Finds a candidate codeobj for the given vaddr.
Definition segment.hpp:66
STL namespace.
Extracts inlined function call stack information for a given address.
bool getCallStackRecursive(Dwarf_Addr addr, std::vector< std::string > &call_stack)
Recursively traverses all children DIEInfos to find inlined functions at a specific address.