rocprofiler-sdk/amd_detail/rocprofiler-sdk-codeobj/disassembly.hpp Source File

rocprofiler-sdk/amd_detail/rocprofiler-sdk-codeobj/disassembly.hpp Source File#

Rocprofiler SDK Developer API: rocprofiler-sdk/amd_detail/rocprofiler-sdk-codeobj/disassembly.hpp Source File
Rocprofiler SDK Developer API 0.4.0
ROCm Profiling API and tools
disassembly.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#pragma once
24
25#include <amd_comgr/amd_comgr.h>
26#include <fcntl.h>
27#include <hsa/amd_hsa_elf.h>
28#include <sys/mman.h>
29#include <sys/stat.h>
30#include <sys/types.h>
31#include <unistd.h>
32
33#include <cstring>
34#include <fstream>
35#include <iostream>
36#include <limits>
37#include <map>
38#include <memory>
39#include <optional>
40#include <string>
41#include <unordered_map>
42#include <vector>
43
44#define THROW_COMGR(call) \
45 if(amd_comgr_status_s status = call) \
46 { \
47 const char* reason = ""; \
48 amd_comgr_status_string(status, &reason); \
49 std::cerr << __FILE__ << ':' << __LINE__ << " code: " << status << " failed: " << reason \
50 << std::endl; \
51 throw std::exception(); \
52 }
53
54#define RETURN_COMGR(call) \
55 if(amd_comgr_status_s status = call) \
56 { \
57 const char* reason = ""; \
58 amd_comgr_status_string(status, &reason); \
59 std::cerr << __FILE__ << ':' << __LINE__ << " code: " << status << " failed: " << reason \
60 << std::endl; \
61 return AMD_COMGR_STATUS_ERROR; \
62 }
63
64#define CHECK_VA2FO(x, msg) \
65 if(!(x)) \
66 { \
67 std::cerr << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; \
68 return std::nullopt; \
69 }
70
71namespace rocprofiler
72{
73namespace codeobj
74{
75namespace disassembly
76{
78{
79public:
80 CodeObjectBinary(const std::string& _uri)
81 : m_uri(_uri)
82 {
83 const std::string protocol_delim{"://"};
84
85 size_t protocol_end = m_uri.find(protocol_delim);
86 std::string protocol = m_uri.substr(0, protocol_end);
87 protocol_end += protocol_delim.length();
88
89 std::transform(protocol.begin(), protocol.end(), protocol.begin(), [](unsigned char c) {
90 return std::tolower(c);
91 });
92
93 std::string path;
94 size_t path_end = m_uri.find_first_of("#?", protocol_end);
95 if(path_end != std::string::npos)
96 {
97 path = m_uri.substr(protocol_end, path_end++ - protocol_end);
98 }
99 else
100 {
101 path = m_uri.substr(protocol_end);
102 }
103
104 /* %-decode the string. */
105 std::string decoded_path;
106 decoded_path.reserve(path.length());
107 for(size_t i = 0; i < path.length(); ++i)
108 {
109 if(path[i] == '%' && std::isxdigit(path[i + 1]) && std::isxdigit(path[i + 2]))
110 {
111 decoded_path += std::stoi(path.substr(i + 1, 2), 0, 16);
112 i += 2;
113 }
114 else
115 {
116 decoded_path += path[i];
117 }
118 }
119
120 /* Tokenize the query/fragment. */
121 std::vector<std::string> tokens;
122 size_t pos, last = path_end;
123 while((pos = m_uri.find('&', last)) != std::string::npos)
124 {
125 tokens.emplace_back(m_uri.substr(last, pos - last));
126 last = pos + 1;
127 }
128 if(last != std::string::npos)
129 {
130 tokens.emplace_back(m_uri.substr(last));
131 }
132
133 /* Create a tag-value map from the tokenized query/fragment. */
134 std::unordered_map<std::string, std::string> params;
135 std::for_each(tokens.begin(), tokens.end(), [&](std::string& token) {
136 size_t delim = token.find('=');
137 if(delim != std::string::npos)
138 {
139 params.emplace(token.substr(0, delim), token.substr(delim + 1));
140 }
141 });
142
143 buffer = std::vector<char>{};
144 size_t offset = 0;
145 size_t size = 0;
146
147 if(auto offset_it = params.find("offset"); offset_it != params.end())
148 {
149 offset = std::stoul(offset_it->second, nullptr, 0);
150 }
151
152 if(auto size_it = params.find("size"); size_it != params.end())
153 {
154 if(!(size = std::stoul(size_it->second, nullptr, 0))) return;
155 }
156
157 if(protocol == "memory") throw std::runtime_error(protocol + " protocol not supported!");
158
159 std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
160 if(!file || !file.is_open()) throw std::runtime_error("could not open " + decoded_path);
161
162 if(!size)
163 {
164 file.ignore(std::numeric_limits<std::streamsize>::max());
165 size_t bytes = file.gcount();
166 file.clear();
167
168 if(bytes < offset) throw std::runtime_error("invalid uri " + decoded_path);
169
170 size = bytes - offset;
171 }
172
173 file.seekg(offset, std::ios_base::beg);
174 buffer.resize(size);
175 file.read(&buffer[0], size);
176 }
177
178 std::string m_uri;
179 std::vector<char> buffer;
180};
181
183{
184 std::string name{};
185 uint64_t faddr = 0;
186 uint64_t vaddr = 0;
187 uint64_t mem_size = 0;
188};
189
191{
192public:
193 DisassemblyInstance(const char* codeobj_data, uint64_t codeobj_size)
194 {
195 buffer = std::vector<char>(codeobj_size, 0);
196 std::memcpy(buffer.data(), codeobj_data, codeobj_size);
197
198 THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
199 THROW_COMGR(amd_comgr_set_data(data, buffer.size(), buffer.data()));
200
201 size_t isa_size = 128;
202 std::string input_isa{};
203 input_isa.resize(isa_size);
204 THROW_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, input_isa.data()));
205
206 THROW_COMGR(amd_comgr_create_disassembly_info(
207 input_isa.data(),
208 &DisassemblyInstance::memory_callback,
209 &DisassemblyInstance::inst_callback,
210 [](uint64_t, void*) {},
211 &info));
212 }
214 {
215 amd_comgr_release_data(data);
216 amd_comgr_destroy_disassembly_info(info);
217 }
218
219 std::pair<std::string, size_t> ReadInstruction(uint64_t faddr)
220 {
221 uint64_t size_read;
222 uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer.data()) + faddr;
223
225 amd_comgr_disassemble_instruction(info, addr_in_buffer, (void*) this, &size_read));
226 return {std::move(this->last_instruction), size_read};
227 }
228
229 std::map<uint64_t, SymbolInfo>& GetKernelMap()
230 {
231 symbol_map = {};
232 THROW_COMGR(amd_comgr_iterate_symbols(data, &DisassemblyInstance::symbol_callback, this));
233
234 return symbol_map;
235 }
236
237 static amd_comgr_status_t symbol_callback(amd_comgr_symbol_t symbol, void* user_data)
238 {
239 amd_comgr_symbol_type_t type;
240 RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_TYPE, &type));
241
242 if(type != AMD_COMGR_SYMBOL_TYPE_FUNC) return AMD_COMGR_STATUS_SUCCESS;
243
244 uint64_t vaddr = 0;
245 uint64_t mem_size = 0;
246 uint64_t name_size = 0;
247 RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_VALUE, &vaddr));
248 RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_SIZE, &mem_size));
250 amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME_LENGTH, &name_size));
251
252 std::string name;
253 name.resize(name_size);
254
255 RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME, name.data()));
256
257 DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
258 std::optional<uint64_t> faddr = instance.va2fo(vaddr);
259
260 if(faddr) instance.symbol_map[vaddr] = {name, *faddr, vaddr, mem_size};
261 return AMD_COMGR_STATUS_SUCCESS;
262 }
263
264 static uint64_t memory_callback(uint64_t from, char* to, uint64_t size, void* user_data)
265 {
266 DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
267 int64_t copysize = reinterpret_cast<int64_t>(instance.buffer.data()) +
268 instance.buffer.size() - static_cast<int64_t>(from);
269 copysize = std::min<int64_t>(size, copysize);
270 std::memcpy(to, (char*) from, copysize);
271 return copysize;
272 }
273
274 static void inst_callback(const char* instruction, void* user_data)
275 {
276 DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
277
278 if(!instruction) return;
279
280 while(*instruction == '\t' || *instruction == ' ')
281 instruction++;
282 instance.last_instruction = instruction;
283 }
284
285 std::optional<uint64_t> va2fo(uint64_t va)
286 {
287 CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
288
289 uint8_t* e_ident = (uint8_t*) buffer.data();
290 CHECK_VA2FO(e_ident, "e_ident is nullptr");
291
292 CHECK_VA2FO(e_ident[EI_MAG0] == ELFMAG0 || e_ident[EI_MAG1] == ELFMAG1 ||
293 e_ident[EI_MAG2] == ELFMAG2 || e_ident[EI_MAG3] == ELFMAG3,
294 "unexpected ei_mag");
295
296 CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
297 CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
298 CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
299 CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
300
301 CHECK_VA2FO(e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
302 e_ident[EI_ABIVERSION] == 3,
303 "unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
304
305 Elf64_Ehdr* ehdr = (Elf64_Ehdr*) buffer.data();
306 CHECK_VA2FO(ehdr, "ehdr is nullptr");
307 CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
308 CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine");
309 CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
310
311 CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr),
312 "buffer is not large enough");
313
314 Elf64_Phdr* phdr = (Elf64_Phdr*) ((uint8_t*) buffer.data() + ehdr->e_phoff);
315 CHECK_VA2FO(phdr, "phdr is nullptr");
316
317 for(uint16_t i = 0; i < ehdr->e_phnum; ++i)
318 {
319 if(phdr[i].p_type != PT_LOAD) continue;
320 if(va < phdr[i].p_vaddr || va >= (phdr[i].p_vaddr + phdr[i].p_memsz)) continue;
321
322 return va + phdr[i].p_offset - phdr[i].p_vaddr;
323 }
324 return std::nullopt;
325 }
326
327 std::vector<char> buffer;
328 std::string last_instruction;
329 amd_comgr_disassembly_info_t info;
330 amd_comgr_data_t data;
331 std::map<uint64_t, SymbolInfo> symbol_map;
332};
333
334} // namespace disassembly
335} // namespace codeobj
336} // namespace rocprofiler
static amd_comgr_status_t symbol_callback(amd_comgr_symbol_t symbol, void *user_data)
std::pair< std::string, unsigned long > ReadInstruction(uint64_t faddr)
static uint64_t memory_callback(uint64_t from, char *to, uint64_t size, void *user_data)
std::map< uint64_t, SymbolInfo > & GetKernelMap()
std::optional< uint64_t > va2fo(uint64_t va)
DisassemblyInstance(const char *codeobj_data, uint64_t codeobj_size)
static void inst_callback(const char *instruction, void *user_data)
#define RETURN_COMGR(call)
#define THROW_COMGR(call)
#define CHECK_VA2FO(x, msg)