Invoke this function to receive callbacks when a ROCm library registers its API intercept table with rocprofiler. Use the rocprofiler_intercept_table_t enumeration for specifying which raw API tables the tool would like to have access to. E.g. including ROCPROFILER_HSA_TABLE in the rocprofiler_at_intercept_table_registration function call communicates to rocprofiler that, when rocprofiler receives a HsaApiTable
instance, the tool would like rocprofiler to provide it access too.
Invoke this function to receive callbacks when a ROCm library registers its API intercept table with rocprofiler. Use the rocprofiler_intercept_table_t enumeration for specifying which raw API tables the tool would like to have access to. E.g. including ROCPROFILER_HSA_TABLE in the rocprofiler_at_intercept_table_registration function call communicates to rocprofiler that, when rocprofiler receives a HsaApiTable
instance, the tool would like rocprofiler to provide it access too.When the HIP, HSA, and ROCTx libraries are initialized (either explicitly or on the first invocation of one of their public API functions), these runtimes will provide a table of function pointers to the rocprofiler library via the rocprofiler-register library if the rocprofiler_configure
symbol is visible in the application's symbol table. The vast majority of tools will want to use the Synchronous Tracing Services to trace these runtime APIs, however, some tools may want or require installing their own intercept functions in lieu of receiving these callbacks and those tools should use the rocprofiler_at_intercept_table_registration to install their intercept functions. There are no restrictions to where or how early this function can be invoked but it will return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if it is invoked after rocprofiler has requested all the tool configurations. Thus, it is highly recommended to invoke this function within the rocprofiler_configure function or the callback passed to the rocprofiler_force_configure function – the reason for this recommendation is that if rocprofiler_at_intercept_table_registration is invoked in one of these locations, rocprofiler can guarantee that the tool will be passed the API table because, at the first instance of a runtime registering it's API table, rocprofiler will ensure that, in the case of the former, rocprofiler will invoke all of the rocprofiler_configure symbols that are visible before checking the list of tools which want to receive the API tables and, in the case of the latter, rocprofiler_force_configure will fail with error code ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if a runtime has already been registered (and, therefore, already scanned and invoked the visible rocprofiler_configure symbols and completed the tool initialization). If rocprofiler_at_intercept_table_registration is invoked outside of these recommended places, even if it is done before the main
function starts (e.g. in a library init/constructor function), it is possible that another library, such as ROCm-aware MPI, caused the HIP and HSA runtime libraries to be initialized when that library was loaded. In this aforementioned scenario, if the ROCm-aware MPI library library init/constructor function runs before your library init/constructor function, rocprofiler will have already processed the API table and will not provide the API table to the tool due to the fact that the API may already be in use and, thus, any modifications to the table might result in thread-safety violations or more disastrous consequences.
namespace
{
template <size_t Idx, typename RetT, typename... Args>
auto
generate_wrapper(const char* name, RetT (*func)(Args...))
{
using functor_type = RetT (*)(Args...);
static const auto* func_name = name;
static functor_type underlying_func = func;
static functor_type wrapped_func = [](Args... args) -> RetT {
std::cout << "Wrapping " << func_name << "..." << std::endl;
if(underlying_func) return underlying_func(args...);
if constexpr(!std::is_void<RetT>::value) return RetT{};
};
return wrapped_func;
}
#define GENERATE_WRAPPER(TABLE, FUNC) \
TABLE->FUNC##_fn = generate_wrapper<__COUNTER__>(#FUNC, TABLE->FUNC##_fn)
void
uint64_t lib_version,
uint64_t lib_instance,
void** tables,
uint64_t num_tables,
void* user_data)
{
throw std::runtime_error{"unexpected library type: " +
std::to_string(static_cast<int>(type))};
if(lib_instance != 0) throw std::runtime_error{"multiple instances of HSA runtime library"};
if(num_tables != 1) throw std::runtime_error{"expected only one table of type HsaApiTable"};
auto* hsa_api_table = static_cast<HsaApiTable*>(tables[0]);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_agent_get_info);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_agent_iterate_isas);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_code_object_reader_create_from_memory);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_create_alt);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_freeze);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_get_symbol_by_name);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_iterate_symbols);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_load_agent_code_object);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_executable_symbol_get_info);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_isa_get_info_alt);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_iterate_agents);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_queue_add_write_index_screlease);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_queue_create);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_queue_load_read_index_relaxed);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_queue_load_read_index_scacquire);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_create);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_destroy);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_load_relaxed);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_silent_store_relaxed);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_store_screlease);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_signal_wait_scacquire);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_system_get_info);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_system_get_major_extension_table);
}
}
const char* runtime_version,
uint32_t priority,
{
id->name = "ExampleTool";
return nullptr;
}
rocprofiler_intercept_table_t
Enumeration for specifying intercept tables supported by rocprofiler. This enumeration is used for in...
rocprofiler_status_t rocprofiler_at_intercept_table_registration(rocprofiler_intercept_library_cb_t callback, int libs, void *data)
rocprofiler_tool_configure_result_t * rocprofiler_configure(uint32_t version, const char *runtime_version, uint32_t priority, rocprofiler_client_id_t *client_id)
This is the special function that tools define to enable rocprofiler support. The tool should return ...
A client refers to an individual or entity engaged in the configuration of ROCprofiler services....
#ifdef NDEBUG
# undef NDEBUG
#endif
#include "client.hpp"
#include "common/defines.hpp"
#include "common/filesystem.hpp"
#include <hip/amd_detail/hip_api_trace.hpp>
#include <cassert>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <map>
#include <mutex>
#include <ratio>
#include <string>
#include <string_view>
#include <vector>
namespace client
{
namespace
{
struct source_location
{
std::string function = {};
std::string file = {};
uint32_t line = 0;
std::string context = {};
};
using call_stack_t = std::vector<source_location>;
using callback_kind_names_t = std::map<rocprofiler_callback_tracing_kind_t, const char*>;
using callback_kind_operation_names_t =
std::map<rocprofiler_callback_tracing_kind_t, std::map<uint32_t, const char*>>;
using wrap_count_t = std::pair<source_location, size_t>;
auto* client_wrap_data = new std::map<size_t, wrap_count_t>{};
size_t func_width = 0;
void
print_call_stack(const call_stack_t& _call_stack)
{
auto ofname = std::string{"intercept_table.log"};
if(auto* eofname = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) ofname = eofname;
std::ostream* ofs = nullptr;
auto cleanup = std::function<void(std::ostream*&)>{};
if(ofname == "stdout")
ofs = &std::cout;
else if(ofname == "stderr")
ofs = &std::cerr;
else
{
ofs = new std::ofstream{ofname};
if(ofs && *ofs)
cleanup = [](std::ostream*& _os) { delete _os; };
else
{
std::cerr << "Error outputting to " << ofname << ". Redirecting to stderr...\n";
ofname = "stderr";
ofs = &std::cerr;
}
}
std::cout << "Outputting collected data to " << ofname << "...\n" << std::flush;
const size_t _func_width = std::min<size_t>(func_width, 60);
size_t n = 0;
*ofs << std::left;
for(const auto& itr : _call_stack)
{
*ofs << std::left << std::setw(2) << ++n << "/" << std::setw(2) << _call_stack.size()
<< " [" << common::fs::path{itr.file}.filename() << ":" << itr.line << "] "
<< std::setw(_func_width) << itr.function;
if(!itr.context.empty()) *ofs << " :: " << itr.context;
*ofs << "\n";
}
*ofs << std::flush;
if(cleanup) cleanup(ofs);
}
void
tool_fini(void* tool_data)
{
assert(tool_data != nullptr);
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
size_t wrapped_count = 0;
for(const auto& itr : *client_wrap_data)
{
auto src_loc = itr.second.first;
src_loc.context += "call_count = " + std::to_string(itr.second.second);
_call_stack->emplace_back(std::move(src_loc));
wrapped_count += itr.second.second;
}
_call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
print_call_stack(*_call_stack);
delete _call_stack;
delete client_wrap_data;
if(wrapped_count == 0)
{
throw std::runtime_error{"intercept_table sample did not wrap HIP runtime API table"};
}
}
template <size_t Idx, typename RetT, typename... Args>
RetT (*underlying_function)(Args...) = nullptr;
template <size_t Idx, typename RetT, typename... Args>
RetT
get_wrapper_function(Args... args)
{
if(client_wrap_data)
{
if(client_wrap_data->at(Idx).second == 0)
std::clog << "First invocation of wrapped function: '"
<< client_wrap_data->at(Idx).first.function << "'...\n"
<< std::flush;
client_wrap_data->at(Idx).second += 1;
}
if(underlying_function<Idx, RetT, Args...>)
return underlying_function<Idx, RetT, Args...>(args...);
if constexpr(!std::is_void<RetT>::value) return RetT{};
}
template <size_t Idx, typename RetT, typename... Args>
auto
generate_wrapper(const char* name, uint32_t line, RetT (*func)(Args...))
{
func_width = std::max(func_width, std::string_view{name}.length());
client_wrap_data->emplace(Idx, wrap_count_t{source_location{name, __FILE__, line, ""}, 0});
underlying_function<Idx, RetT, Args...> = func;
return &get_wrapper_function<Idx, RetT, Args...>;
}
#define GENERATE_WRAPPER(TABLE, FUNC) \
TABLE->FUNC##_fn = generate_wrapper<__COUNTER__>(#FUNC, __LINE__, TABLE->FUNC##_fn)
void
uint64_t lib_version,
uint64_t lib_instance,
void** tables,
uint64_t num_tables,
void* user_data)
{
throw std::runtime_error{"unexpected library type: " +
std::to_string(static_cast<int>(type))};
if(lib_instance != 0) throw std::runtime_error{"multiple instances of HIP runtime library"};
if(num_tables != 1)
throw std::runtime_error{"expected only one table of type HipDispatchTable"};
auto* call_stack = static_cast<std::vector<client::source_location>*>(user_data);
uint32_t major = lib_version / 10000;
uint32_t minor = (lib_version % 10000) / 100;
uint32_t patch = lib_version % 100;
auto info = std::stringstream{};
info << client_id->
name <<
" is using HIP runtime v" << major <<
"." << minor <<
"." << patch;
std::clog << info.str() << "\n" << std::flush;
call_stack->emplace_back(client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
auto* hip_api_table = static_cast<HipDispatchTable*>(tables[0]);
GENERATE_WRAPPER(hip_api_table, hipGetDeviceCount);
GENERATE_WRAPPER(hip_api_table, hipSetDevice);
GENERATE_WRAPPER(hip_api_table, hipStreamCreate);
GENERATE_WRAPPER(hip_api_table, hipStreamDestroy);
GENERATE_WRAPPER(hip_api_table, hipStreamSynchronize);
GENERATE_WRAPPER(hip_api_table, hipDeviceSynchronize);
GENERATE_WRAPPER(hip_api_table, hipDeviceReset);
GENERATE_WRAPPER(hip_api_table, hipGetErrorString);
GENERATE_WRAPPER(hip_api_table, hipExtLaunchKernel);
GENERATE_WRAPPER(hip_api_table, hipExtLaunchMultiKernelMultiDevice);
GENERATE_WRAPPER(hip_api_table, hipGraphLaunch);
GENERATE_WRAPPER(hip_api_table, hipLaunchByPtr);
GENERATE_WRAPPER(hip_api_table, hipLaunchCooperativeKernel);
GENERATE_WRAPPER(hip_api_table, hipLaunchCooperativeKernelMultiDevice);
GENERATE_WRAPPER(hip_api_table, hipLaunchHostFunc);
GENERATE_WRAPPER(hip_api_table, hipLaunchKernel);
GENERATE_WRAPPER(hip_api_table, hipModuleLaunchCooperativeKernel);
GENERATE_WRAPPER(hip_api_table, hipModuleLaunchCooperativeKernelMultiDevice);
GENERATE_WRAPPER(hip_api_table, hipModuleLaunchKernel);
GENERATE_WRAPPER(hip_api_table, hipExtModuleLaunchKernel);
GENERATE_WRAPPER(hip_api_table, hipHccModuleLaunchKernel);
GENERATE_WRAPPER(hip_api_table, hipMemcpy);
GENERATE_WRAPPER(hip_api_table, hipMemcpyAsync);
GENERATE_WRAPPER(hip_api_table, hipMemset);
GENERATE_WRAPPER(hip_api_table, hipMemsetAsync);
}
}
void
setup()
{}
void
shutdown()
{}
}
const char* runtime_version,
uint32_t priority,
{
id->name = "ExampleTool";
client::client_id = id;
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
auto info = std::stringstream{};
info <<
id->
name <<
" (priority=" << priority <<
") is using rocprofiler-sdk v" << major <<
"."
<< minor << "." << patch << " (" << runtime_version << ")";
std::clog << info.str() << std::endl;
{
auto version_info = std::array<uint32_t, 3>{};
ROCPROFILER_CALL(
"failed to get version info");
if(std::array<uint32_t, 3>{major, minor, patch} != version_info)
{
throw std::runtime_error{"version info mismatch"};
}
}
auto* client_tool_data = new std::vector<client::source_location>{};
client_tool_data->emplace_back(
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
ROCPROFILER_CALL(
static_cast<void*>(client_tool_data)),
"runtime api registration");
static auto cfg =
nullptr,
&client::tool_fini,
static_cast<void*>(client_tool_data)};
return &cfg;
}
@ ROCPROFILER_HIP_RUNTIME_TABLE
const char * name
clients should set this value for debugging
rocprofiler_status_t rocprofiler_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
Query the version of the installed library.