25#include "disassembly.hpp"
29#include <elfutils/libdw.h>
30#include <hsa/amd_hsa_elf.h>
41#include <unordered_map>
52using marker_id_t = segment::marker_id_t;
56 Instruction() =
default;
57 Instruction(std::string&& _inst,
size_t _size)
58 : inst(
std::move(_inst))
62 std::string comment{};
67 marker_id_t codeobj_id{0};
69 static constexpr std::string_view separator =
" -> ";
83 Dwarf_Addr low{std::numeric_limits<Dwarf_Addr>::max()};
87 void expand(
const DRange& other)
89 low = std::min(low, other.low);
90 high = std::max(high, other.high);
96 bool contains(Dwarf_Addr addr)
const {
return low <= addr && high > addr; }
120 std::vector<DRange> all_ranges{};
121 std::vector<std::unique_ptr<DIEInfo>> children{};
124 DRange total_range{};
126 DRange children_range{};
128 std::string file_and_line{};
130 void addRange(
const DRange& range)
132 all_ranges.push_back(range);
133 total_range.expand(range);
137class CodeobjDecoderComponent
141 ProtectedFd(std::string_view uri)
143#if defined(_GNU_SOURCE) && defined(MFD_ALLOW_SEALING) && defined(MFD_CLOEXEC)
144 m_fd = ::memfd_create(uri.data(), MFD_ALLOW_SEALING | MFD_CLOEXEC);
146 if(m_fd == -1) m_fd = ::open(
"/tmp", O_TMPFILE | O_RDWR, 0666);
147 if(m_fd == -1)
throw std::runtime_error(
"Could not create a file for codeobj!");
151 if(m_fd != -1) ::close(m_fd);
157 CodeobjDecoderComponent(
const char* codeobj_data, uint64_t codeobj_size)
159 ProtectedFd prot(
"");
160 if(::write(prot.m_fd, codeobj_data, codeobj_size) !=
static_cast<int64_t
>(codeobj_size))
161 throw std::runtime_error(
"Could not write to temporary file!");
163 ::lseek(prot.m_fd, 0, SEEK_SET);
166 m_line_number_map = {};
168 std::unique_ptr<Dwarf, void (*)(Dwarf*)> dbg(dwarf_begin(prot.m_fd, DWARF_C_READ),
169 [](Dwarf* _dbg) { dwarf_end(_dbg); });
173 Dwarf_Off cu_offset{};
174 Dwarf_Off next_offset{};
175 size_t header_size{};
177 std::map<Dwarf_Addr, std::string> line_addrs{};
178 std::unordered_map<Dwarf_Off, std::unique_ptr<DIEInfo>> diemap{};
182 dbg.get(), cu_offset, &next_offset, &header_size,
nullptr,
nullptr,
nullptr) ==
186 if(!dwarf_offdie(dbg.get(), cu_offset + header_size, &die))
188 cu_offset = next_offset;
194 if(dwarf_getsrclines(&die, &lines, &line_count) != 0)
196 cu_offset = next_offset;
200 for(
size_t i = 0; i < line_count; ++i)
204 Dwarf_Line* line = dwarf_onesrcline(lines, i);
206 if(line && dwarf_lineaddr(line, &addr) == 0 &&
207 dwarf_lineno(line, &line_number) == 0 && line_number != 0)
209 std::string src = dwarf_linesrc(line,
nullptr,
nullptr);
210 auto dwarf_line = src +
':' + std::to_string(line_number);
212 std::vector<std::string> call_stack_info{};
214 auto& die_ptr = diemap[dwarf_dieoffset(&die)];
215 if(die_ptr ==
nullptr) die_ptr = std::make_unique<DIEInfo>(&die);
216 die_ptr->getCallStackRecursive(addr, call_stack_info);
218 size_t capacity = dwarf_line.size() +
219 Instruction::separator.size() * call_stack_info.size();
220 for(
const auto& call : call_stack_info)
221 capacity += call.size();
223 dwarf_line.reserve(capacity);
224 for(
const auto& call : call_stack_info)
226 dwarf_line += Instruction::separator;
229 line_addrs[addr] = std::move(dwarf_line);
232 cu_offset = next_offset;
235 auto it = line_addrs.begin();
236 if(it != line_addrs.end())
238 while(std::next(it) != line_addrs.end())
240 uint64_t delta = std::next(it)->first - it->first;
241 auto segment = segment::address_range_t{it->first, delta, 0};
242 m_line_number_map.emplace(segment, std::move(it->second));
245 auto segment = segment::address_range_t{it->first, codeobj_size - it->first, 0};
246 m_line_number_map.emplace(segment, std::move(it->second));
251 disassembly = std::make_unique<DisassemblyInstance>(codeobj_data, codeobj_size);
254 m_symbol_map = disassembly->GetKernelMap();
258 ~CodeobjDecoderComponent() =
default;
260 std::optional<uint64_t> va2fo(uint64_t vaddr)
const
262 if(disassembly)
return disassembly->va2fo(vaddr);
266 std::unique_ptr<Instruction> disassemble_instruction(uint64_t faddr, uint64_t vaddr)
268 if(!disassembly)
throw std::exception();
270 auto pair = disassembly->ReadInstruction(faddr);
271 auto inst = std::make_unique<Instruction>(std::move(pair.first), pair.second);
275 auto it = m_line_number_map.find({vaddr, 0, 0});
276 if(it != m_line_number_map.end()) inst->comment = it->second;
281 std::map<uint64_t, SymbolInfo> m_symbol_map{};
282 std::vector<std::shared_ptr<Instruction>> instructions{};
283 std::unique_ptr<DisassemblyInstance> disassembly{};
285 std::map<segment::address_range_t, std::string> m_line_number_map{};
288class LoadedCodeobjDecoder
291 LoadedCodeobjDecoder(
const char* filepath, uint64_t _load_addr, uint64_t _memsize)
292 : load_addr(_load_addr)
293 , load_end(_load_addr + _memsize)
295 if(!filepath)
throw std::runtime_error(
"Empty filepath.");
297 std::string_view fpath(filepath);
299 if(fpath.rfind(
".out") + 4 == fpath.size())
301 std::ifstream file(filepath, std::ios::in | std::ios::binary);
303 if(!file.is_open())
throw std::runtime_error(
"Invalid file " + std::string(filepath));
305 std::vector<char> buffer;
306 file.seekg(0, file.end);
307 buffer.resize(file.tellg());
308 file.seekg(0, file.beg);
309 file.read(buffer.data(), buffer.size());
311 decoder = std::make_unique<CodeobjDecoderComponent>(buffer.data(), buffer.size());
315 std::unique_ptr<CodeObjectBinary> binary = std::make_unique<CodeObjectBinary>(filepath);
316 auto& buffer = binary->buffer;
317 decoder = std::make_unique<CodeobjDecoderComponent>(buffer.data(), buffer.size());
320 LoadedCodeobjDecoder(
const void* data, uint64_t size, uint64_t _load_addr,
size_t _memsize)
321 : load_addr(_load_addr)
322 , load_end(load_addr + _memsize)
324 decoder = std::make_unique<CodeobjDecoderComponent>(
static_cast<const char*
>(data), size);
326 std::unique_ptr<Instruction> get(uint64_t ld_addr)
328 if(!decoder || ld_addr < load_addr)
return nullptr;
330 uint64_t voffset = ld_addr - load_addr;
331 auto faddr = decoder->va2fo(voffset);
332 if(!faddr)
return nullptr;
334 auto unique = decoder->disassemble_instruction(*faddr, voffset);
335 if(unique ==
nullptr || unique->size == 0)
return nullptr;
336 unique->ld_addr = ld_addr;
340 uint64_t begin()
const {
return load_addr; };
341 uint64_t end()
const {
return load_end; }
342 uint64_t size()
const {
return load_end - load_addr; }
343 bool inrange(uint64_t addr)
const {
return addr >= begin() && addr < end(); }
345 const char* getSymbolName(uint64_t addr)
const
347 if(!decoder)
return nullptr;
349 auto it = decoder->m_symbol_map.find(addr - load_addr);
350 if(it != decoder->m_symbol_map.end())
return it->second.name.data();
355 std::map<uint64_t, SymbolInfo>& getSymbolMap()
const
357 if(!decoder)
throw std::exception();
358 return decoder->m_symbol_map;
360 const uint64_t load_addr;
363 uint64_t load_end{0};
365 std::unique_ptr<CodeobjDecoderComponent> decoder{
nullptr};
377 virtual void addDecoder(
const char* filepath,
382 decoders[id] = std::make_shared<LoadedCodeobjDecoder>(filepath, load_addr, memsize);
385 virtual void addDecoder(
const void* data,
392 std::make_shared<LoadedCodeobjDecoder>(data, memory_size, load_addr, memsize);
395 virtual bool removeDecoderbyId(marker_id_t
id) {
return decoders.erase(
id) != 0; }
397 std::unique_ptr<Instruction> get(marker_id_t
id, uint64_t offset)
401 auto& decoder = decoders.at(
id);
402 auto inst = decoder->get(decoder->begin() + offset);
403 if(inst !=
nullptr) inst->codeobj_id = id;
405 }
catch(std::out_of_range&)
410 const char* getSymbolName(marker_id_t
id, uint64_t offset)
414 auto& decoder = decoders.at(
id);
415 uint64_t vaddr = decoder->begin() + offset;
416 if(decoder->inrange(vaddr))
return decoder->getSymbolName(vaddr);
417 }
catch(std::out_of_range&)
423 std::unordered_map<marker_id_t, std::shared_ptr<LoadedCodeobjDecoder>> decoders{};
437 void addDecoder(
const char* filepath,
440 uint64_t memsize)
override
442 this->Super::addDecoder(filepath,
id, load_addr, memsize);
443 auto ptr = decoders.at(
id);
444 table.insert({ptr->begin(), ptr->size(),
id});
447 void addDecoder(
const void* data,
451 uint64_t memsize)
override
453 this->Super::addDecoder(data, memory_size,
id, load_addr, memsize);
454 auto ptr = decoders.at(
id);
455 table.insert({ptr->begin(), ptr->size(),
id});
458 bool removeDecoder(marker_id_t
id, uint64_t load_addr)
460 return table.remove(load_addr) && this->Super::removeDecoderbyId(
id);
463 bool removeDecoder(marker_id_t
id)
466 if(decoders.find(
id) != decoders.end()) addr = decoders.at(
id)->begin();
468 return removeDecoder(
id, addr);
471 std::unique_ptr<Instruction> get(uint64_t vaddr)
473 auto addr_range = table.find_codeobj_in_range(vaddr);
474 return this->Super::get(addr_range.id, vaddr - addr_range.addr);
477 std::unique_ptr<Instruction> get(marker_id_t
id, uint64_t offset)
482 return this->Super::get(
id, offset);
485 const char* getSymbolName(uint64_t vaddr)
487 for(
auto& [_, decoder] : decoders)
489 if(!decoder->inrange(vaddr))
continue;
490 return decoder->getSymbolName(vaddr);
495 std::map<uint64_t, SymbolInfo> getSymbolMap()
const
497 std::map<uint64_t, SymbolInfo> symbols;
499 for(
const auto& [_, dec] : decoders)
501 auto& smap = dec->getSymbolMap();
502 for(
auto& [vaddr, sym] : smap)
503 symbols[vaddr + dec->load_addr] = sym;
509 std::map<uint64_t, SymbolInfo> getSymbolMap(marker_id_t
id)
const
511 if(decoders.find(
id) == decoders.end())
return {};
515 return decoders.at(
id)->getSymbolMap();
526inline DIEInfo::DIEInfo(Dwarf_Die* die)
528 if(dwarf_tag(die) == DW_TAG_inlined_subroutine)
531 Dwarf_Addr high_pc{};
535 if(dwarf_lowpc(die, &low_pc) == 0 && dwarf_highpc(die, &high_pc) == 0)
537 addRange(DRange{low_pc, high_pc});
545 while((offset = dwarf_ranges(die, offset, &base, &low_pc, &high_pc)) > 0)
546 addRange(DRange{low_pc, high_pc});
550 Dwarf_Attribute call_file_attr{};
551 Dwarf_Attribute call_line_attr{};
552 Dwarf_Word call_file{};
553 Dwarf_Word call_line{};
557 if(!dwarf_attr(die, DW_AT_call_file, &call_file_attr) ||
558 !dwarf_attr(die, DW_AT_call_line, &call_line_attr) ||
559 dwarf_formudata(&call_file_attr, &call_file) != 0 ||
560 dwarf_formudata(&call_line_attr, &call_line) != 0)
565 if(!dwarf_diecu(die, &cu_die,
nullptr,
nullptr))
return;
568 Dwarf_Files* files{};
570 if(dwarf_getsrcfiles(&cu_die, &files, &nfiles) == 0 && call_file < nfiles)
572 if(
const char* filename = dwarf_filesrc(files, call_file,
nullptr,
nullptr))
575 file_and_line = std::string(filename) +
":" + std::to_string(call_line);
580 children_range = total_range;
586 if(dwarf_child(die, &child) == 0)
590 children.emplace_back(std::make_unique<DIEInfo>(&child));
591 children_range.expand(children.back()->children_range);
593 }
while(dwarf_siblingof(&child, &child) == 0);
600 if(!children_range.contains(addr))
return false;
602 bool addedOne =
false;
604 for(
auto& child : children)
607 addedOne = child->getCallStackRecursive(addr, call_stack);
611 if(total_range.contains(addr))
613 for(
auto& range : all_ranges)
615 if(!range.contains(addr))
continue;
617 call_stack.emplace_back(file_and_line);
Translates virtual addresses to elf file offsets.
Maps ID and offsets into instructions.
Finds a candidate codeobj for the given vaddr.
Extracts inlined function call stack information for a given address.
bool getCallStackRecursive(Dwarf_Addr addr, std::vector< std::string > &call_stack)
Recursively traverses all children DIEInfos to find inlined functions at a specific address.