Iterates over all the arguments for the traced function (when available). This is particularly useful when tools want to annotate traces with the function arguments. See for a usage example.
Iterates over all the arguments for the traced function (when available). This is particularly useful when tools want to annotate traces with the function arguments. See for a usage example.In contrast to rocprofiler_iterate_callback_tracing_kind_operation_args, this function cannot dereference pointer arguments since there is a high probability that the pointer address references the stack and the buffer tracing record is delivered after the stack variables of the corresponding function have been destroyed.
#ifdef NDEBUG
# undef NDEBUG
#endif
#include "client.hpp"
#include <rocprofiler-sdk/registration.h>
#include "common/call_stack.hpp"
#include "common/defines.hpp"
#include "common/filesystem.hpp"
#include "common/name_info.hpp"
#include <atomic>
#include <cassert>
#include <chrono>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <map>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <thread>
#include <unordered_set>
#include <vector>
namespace client
{
namespace
{
using common::buffer_name_info;
using common::call_stack_t;
using common::source_location;
using kernel_symbol_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
buffer_name_info client_name_info = {};
kernel_symbol_map_t client_kernels = {};
template <typename Tp>
std::string
as_hex(Tp _v, size_t _width = 16)
{
uintptr_t _vp = 0;
if constexpr(std::is_pointer<Tp>::value)
_vp = reinterpret_cast<uintptr_t>(_v);
else
_vp = _v;
auto _ss = std::stringstream{};
_ss.fill('0');
_ss << "0x" << std::hex << std::setw(_width) << _vp;
return _ss.str();
}
void
print_call_stack(const call_stack_t& _call_stack)
{
common::print_call_stack("api_buffered_trace.log", _call_stack);
}
void
void* callback_data)
{
{
{
ROCPROFILER_CALL(flush_status, "buffer flush");
}
}
{
auto* data =
static_cast<kernel_symbol_data_t*
>(record.
payload);
{
client_kernels.emplace(data->kernel_id, *data);
}
{
}
}
(void) user_data;
(void) callback_data;
}
void
size_t num_headers,
void* user_data,
uint64_t drop_count)
{
assert(user_data != nullptr);
assert(drop_count == 0 && "drop count should be zero for lossless policy");
if(num_headers == 0)
throw std::runtime_error{
"rocprofiler invoked a buffer callback with no headers. this should never happen"};
else if(headers == nullptr)
throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the "
"array of headers. this should never happen"};
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
auto kind_name = std::string{};
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)
{
const char* _name = nullptr;
"query buffer tracing kind name");
if(_name)
{
static size_t len = 15;
kind_name = std::string{_name};
len = std::max(len, kind_name.length());
kind_name.resize(len, ' ');
kind_name += " :: ";
}
}
auto get_name = [](const auto* _record) -> std::string_view {
try
{
return client_name_info.at(_record->kind, _record->operation);
} catch(std::exception& e)
{
std::cerr << __FUNCTION__
<< " threw an exception for buffer tracing kind=" << _record->kind
<< ", operation=" << _record->operation << "\nException: " << e.what()
<< std::flush;
abort();
}
return std::string_view{"??"};
};
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
{
auto* record =
auto info = std::stringstream{};
info <<
"tid=" << record->
thread_id <<
", context=" << context.
handle
<<
", buffer_id=" << buffer_id.
handle
<<
", cid=" << record->
correlation_id.
internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
<< ", name=" << get_name(record);
if(record->start_timestamp > record->end_timestamp)
{
auto msg = std::stringstream{};
msg << "hsa api: start > end (" << record->start_timestamp << " > "
<< record->end_timestamp
<< "). diff = " << (record->start_timestamp - record->end_timestamp);
std::cerr << "threw an exception " << msg.str() << "\n" << std::flush;
}
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
{
auto* record =
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle
<<
", cid=" << record->correlation_id.
internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
<< ", name=" << client_name_info[record->kind][record->operation];
if(record->start_timestamp > record->end_timestamp)
{
auto msg = std::stringstream{};
msg << "hip api: start > end (" << record->start_timestamp << " > "
<< record->end_timestamp
<< "). diff = " << (record->start_timestamp - record->end_timestamp);
std::cerr << "threw an exception " << msg.str() << "\n" << std::flush;
}
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
{
auto* record =
auto info = std::stringstream{};
auto kernel_id = record->dispatch_info.kernel_id;
auto kernel_name = (client_kernels.count(kernel_id) > 0)
? std::string_view{client_kernels.at(kernel_id).kernel_name}
: std::string_view{"??"};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle
<<
", cid=" << record->correlation_id.
internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", agent_id=" << record->dispatch_info.agent_id.handle
<< ", queue_id=" << record->dispatch_info.queue_id.handle
<< ", kernel_id=" << record->dispatch_info.kernel_id << ", kernel=" << kernel_name
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
<< ", private_segment_size=" << record->dispatch_info.private_segment_size
<< ", group_segment_size=" << record->dispatch_info.group_segment_size
<< ", workgroup_size=(" << record->dispatch_info.workgroup_size.x << ","
<< record->dispatch_info.workgroup_size.y << ","
<< record->dispatch_info.workgroup_size.z << "), grid_size=("
<< record->dispatch_info.grid_size.x << "," << record->dispatch_info.grid_size.y
<< "," << record->dispatch_info.grid_size.z << ")";
if(record->start_timestamp > record->end_timestamp)
throw std::runtime_error("kernel dispatch: start > end");
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
{
auto* record =
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle
<<
", cid=" << record->correlation_id.
internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", src_agent_id=" << record->src_agent_id.handle
<< ", dst_agent_id=" << record->dst_agent_id.handle
<< ", direction=" << record->operation << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp << ", name=" << get_name(record);
if(record->start_timestamp > record->end_timestamp)
throw std::runtime_error("memory copy: start > end");
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
{
auto* record =
auto info = std::stringstream{};
auto _elapsed =
std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(
std::chrono::nanoseconds{record->end_timestamp - record->start_timestamp})
.count();
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle
<<
", cid=" << record->correlation_id.
internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", agent_id=" << record->agent_id.handle
<< ", queue_id=" << record->queue_id.handle << ", thread_id=" << record->thread_id
<< ", elapsed=" << std::setprecision(3) << std::fixed << _elapsed
<< " usec, flags=" << record->flags << ", name=" << get_name(record);
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else
{
auto _msg = std::stringstream{};
_msg << "unexpected rocprofiler_record_header_t category + kind: (" << header->category
<< " + " << header->kind << ")";
throw std::runtime_error{_msg.str()};
}
}
}
void
{
static_cast<call_stack_t*>(tool_data)->emplace_back(
source_location{__FUNCTION__,
__FILE__,
__LINE__,
std::string{"internal thread about to be created by rocprofiler (lib="} +
std::to_string(lib) + ")"});
}
void
{
static_cast<call_stack_t*>(tool_data)->emplace_back(
source_location{__FUNCTION__,
__FILE__,
__LINE__,
std::string{"internal thread was created by rocprofiler (lib="} +
std::to_string(lib) + ")"});
}
int
{
assert(tool_data != nullptr);
auto* call_stack_v = static_cast<call_stack_t*>(tool_data);
call_stack_v->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
client_name_info = common::get_buffer_tracing_names();
for(const auto& itr : client_name_info)
{
auto name_idx = std::stringstream{};
name_idx << " [" << std::setw(3) << itr.value << "]";
call_stack_v->emplace_back(
source_location{"rocprofiler_buffer_tracing_kind_names " + name_idx.str(),
__FILE__,
__LINE__,
std::string{itr.name}});
for(auto [didx, ditr] : itr.items())
{
auto operation_idx = std::stringstream{};
operation_idx << " [" << std::setw(3) << didx << "]";
call_stack_v->emplace_back(source_location{
"rocprofiler_buffer_tracing_kind_operation_names" + operation_idx.str(),
__FILE__,
__LINE__,
std::string{"- "} + std::string{*ditr}});
}
}
client_fini_func = fini_func;
auto code_object_ops = std::vector<rocprofiler_tracing_operation_t>{
ROCPROFILER_CALL(
code_object_ops.data(),
code_object_ops.size(),
tool_code_object_callback,
nullptr),
"code object tracing service configure");
constexpr auto buffer_size_bytes = 4096;
constexpr auto buffer_watermark_bytes = buffer_size_bytes - (buffer_size_bytes / 8);
buffer_size_bytes,
buffer_watermark_bytes,
tool_tracing_callback,
tool_data,
&client_buffer),
"buffer creation");
for(auto itr :
{
client_ctx, itr, nullptr, 0, client_buffer),
"buffer tracing service configure");
}
ROCPROFILER_CALL(
"buffer tracing service configure");
ROCPROFILER_CALL(
"buffer tracing service for kernel dispatch configure");
ROCPROFILER_CALL(
"buffer tracing service for memory copy configure");
ROCPROFILER_CALL(
"buffer tracing service for scratch memory configure");
"creating callback thread");
"assignment of thread for buffer");
int valid_ctx = 0;
"context validity check");
if(valid_ctx == 0)
{
return -1;
}
return 0;
}
void
tool_fini(void* tool_data)
{
assert(tool_data != nullptr);
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
_call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
print_call_stack(*_call_stack);
delete _call_stack;
}
}
void
setup()
{
if(int status = 0;
{
"force configuration");
}
}
void
shutdown()
{
if(client_id)
{
client_fini_func(*client_id);
}
}
void
start()
{
}
void
identify(uint64_t val)
{
}
void
stop()
{
}
}
const char* runtime_version,
uint32_t priority,
{
id->name = "ExampleTool";
client::client_id = id;
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
auto info = std::stringstream{};
info <<
id->
name <<
" (priority=" << priority <<
") is using rocprofiler-sdk v" << major <<
"."
<< minor << "." << patch << " (" << runtime_version << ")";
std::clog << info.str() << std::endl;
auto* client_tool_data = new std::vector<client::source_location>{};
client_tool_data->emplace_back(
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
client::thread_precreate,
client::thread_postcreate,
ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_MARKER_LIBRARY,
static_cast<void*>(client_tool_data)),
"registration for thread creation notifications");
static auto cfg =
&client::tool_init,
&client::tool_fini,
static_cast<void*>(client_tool_data)};
return &cfg;
}
uint64_t value
usage example: set to process id, thread id, etc.
rocprofiler_user_data_t external
An ID specified by tools to associate external events. See include/rocprofiler-sdk/external_correlati...
uint64_t internal
A unique ID created by rocprofiler-sdk when an API call is invoked.
uint64_t rocprofiler_thread_id_t
Thread ID. Value will be equivalent to syscall(__NR_gettid)
rocprofiler_buffer_tracing_kind_t
Service Buffer Tracing Kind.
rocprofiler_runtime_library_t
Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is used for t...
@ ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER
Kernel symbols - Device.
@ ROCPROFILER_CODE_OBJECT_LOAD
Code object containing kernel symbols.
@ ROCPROFILER_BUFFER_POLICY_LOSSLESS
Block when buffer is full.
@ ROCPROFILER_STATUS_ERROR_BUFFER_BUSY
buffer operation failed because it currently busy handling another request (e.g. flushing)
@ ROCPROFILER_STATUS_SUCCESS
No error occurred.
@ ROCPROFILER_CALLBACK_PHASE_UNLOAD
Callback invoked prior to code object unloading.
@ ROCPROFILER_CALLBACK_PHASE_LOAD
Callback invoked prior to code object loading.
@ ROCPROFILER_BUFFER_TRACING_HSA_CORE_API
@ ROCPROFILER_BUFFER_TRACING_MEMORY_COPY
@ ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY
Buffer scratch memory reclaimation info.
@ ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH
Buffer kernel dispatch info.
@ ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API
@ ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API
@ ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API
@ ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API
@ ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT
User-assignable data type.
rocprofiler_status_t rocprofiler_create_buffer(rocprofiler_context_id_t context, unsigned long size, unsigned long watermark, rocprofiler_buffer_policy_t policy, rocprofiler_buffer_tracing_cb_t callback, void *callback_data, rocprofiler_buffer_id_t *buffer_id)
Create buffer.
rocprofiler_status_t rocprofiler_flush_buffer(rocprofiler_buffer_id_t buffer_id)
Flush buffer.
rocprofiler_status_t rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t context_id, rocprofiler_buffer_tracing_kind_t kind, const rocprofiler_tracing_operation_t *operations, unsigned long operations_count, rocprofiler_buffer_id_t buffer_id)
Configure Buffer Tracing Service.
rocprofiler_status_t rocprofiler_query_buffer_tracing_kind_name(rocprofiler_buffer_tracing_kind_t kind, const char **name, uint64_t *name_len)
Query the name of the buffer tracing kind. The name retrieved from this function is a string literal ...
ROCProfiler Buffer HIP API Tracer Record.
ROCProfiler Buffer HSA API Tracer Record.
ROCProfiler Buffer Kernel Dispatch Tracer Record.
ROCProfiler Buffer Memory Copy Tracer Record.
ROCProfiler Buffer Scratch Memory Tracer Record.
rocprofiler_status_t rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t context_id, rocprofiler_callback_tracing_kind_t kind, const rocprofiler_tracing_operation_t *operations, unsigned long operations_count, rocprofiler_callback_tracing_cb_t callback, void *callback_args)
Configure Callback Tracing Service. The callback tracing service provides two synchronous callbacks a...
ROCProfiler Code Object Kernel Symbol Tracer Callback Record.
rocprofiler_status_t rocprofiler_start_context(rocprofiler_context_id_t context_id)
Start context.
rocprofiler_status_t rocprofiler_create_context(rocprofiler_context_id_t *context_id)
Create context.
rocprofiler_status_t rocprofiler_context_is_valid(rocprofiler_context_id_t context_id, int *status)
Query whether the context is valid.
rocprofiler_status_t rocprofiler_stop_context(rocprofiler_context_id_t context_id)
Stop context.
rocprofiler_status_t rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context, rocprofiler_thread_id_t tid, rocprofiler_user_data_t external_correlation_id)
Push default value for external field in rocprofiler_correlation_id_t onto stack.
rocprofiler_status_t rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id, rocprofiler_callback_thread_t cb_thread_id)
(experimental) By default, all buffered results are delivered on the same thread. Using rocprofiler_c...
rocprofiler_status_t rocprofiler_at_internal_thread_create(rocprofiler_internal_thread_library_cb_t precreate, rocprofiler_internal_thread_library_cb_t postcreate, int libs, void *data)
(experimental) Invoke this function to receive callbacks before and after the creation of an internal...
rocprofiler_status_t rocprofiler_create_callback_thread(rocprofiler_callback_thread_t *cb_thread_id)
(experimental) Create a handle to a unique thread (created by rocprofiler) which, when associated wit...
(experimental) opaque handle to an internal thread identifier which delivers callbacks for buffers
rocprofiler_status_t rocprofiler_get_thread_id(rocprofiler_thread_id_t *tid)
Get the identifier value of the current thread that is used by rocprofiler.
const char * name
clients should set this value for debugging
void(* rocprofiler_client_finalize_t)(rocprofiler_client_id_t)
Prototype for the function pointer provided to tool in rocprofiler_tool_initialize_t....
rocprofiler_tool_configure_result_t * rocprofiler_configure(uint32_t version, const char *runtime_version, uint32_t priority, rocprofiler_client_id_t *client_id)
This is the special function that tools define to enable rocprofiler support. The tool should return ...
rocprofiler_status_t rocprofiler_is_initialized(int *status)
Query whether rocprofiler has already scanned the binary for all the instances of rocprofiler_configu...
rocprofiler_status_t rocprofiler_force_configure(rocprofiler_configure_func_t configure_func)
Function for explicitly registering a configuration with rocprofiler. This can be invoked before any ...
(experimental) A client refers to an individual or entity engaged in the configuration of ROCprofiler...
ROCProfiler-SDK API interface.