ROCm XIO API reference#
2026-04-23
22 min read time
This page documents the ROCm XIO public C++ API, extracted
from annotated source headers by Doxygen and rendered in Sphinx
via rocm_docs.doxygen (Breathe directives).
Core Framework#
Base Classes#
-
class XioEndpoint#
Base class for all endpoint implementations.
Uses polymorphism to eliminate switch statements and function pointers.
Public Functions
-
virtual void applyCommonConfig(void *endpointConfig, const XioEndpointConfig *baseConfig)#
Apply common config to endpoint config.
- Parameters:
endpointConfig – Endpoint-specific config pointer.
baseConfig – Base configuration.
-
virtual size_t getCompletionQueueEntrySize() const = 0#
Get completion queue entry size in bytes.
-
virtual size_t getCompletionQueueLength(const XioEndpointConfig *config) const#
Get number of completion queue entries.
- Parameters:
config – Base endpoint configuration.
- Returns:
Number of entries (default: numThreads).
-
virtual const char *getDescription() const = 0#
Get a human-readable description.
-
virtual unsigned getDoorbellQueueLength() const#
Get doorbell queue length.
- Returns:
Doorbell queue length, or 0 if disabled.
-
virtual unsigned getIterations(void *endpointConfig) const#
Get iteration count for this endpoint.
- Parameters:
endpointConfig – Endpoint config pointer.
- Returns:
Number of iterations to run.
-
virtual const char *getName() const = 0#
Get the endpoint name string.
-
virtual size_t getSubmissionQueueEntrySize() const = 0#
Get submission queue entry size in bytes.
-
virtual size_t getSubmissionQueueLength(const XioEndpointConfig *config) const#
Get number of submission queue entries.
- Parameters:
config – Base endpoint configuration.
- Returns:
Number of entries (default: numThreads).
-
virtual EndpointType getType() const = 0#
Get the endpoint type identifier.
-
virtual void *initializeEndpointConfig()#
Initialize endpoint-specific configuration.
- Returns:
Pointer to config object, or nullptr.
-
virtual bool isEmulateMode() const#
Check if endpoint is in emulate mode.
- Returns:
true if emulate mode is enabled.
-
virtual hipError_t run(XioEndpointConfig *config) = 0#
Run the endpoint test.
- Parameters:
config – Endpoint configuration.
- Returns:
hipSuccess on success, error code on failure.
-
virtual std::string validateConfig(void *endpointConfig)#
Validate endpoint-specific configuration.
- Parameters:
endpointConfig – Endpoint config to validate.
- Returns:
Empty string if valid, error message otherwise.
-
virtual ~XioEndpoint() = default#
-
virtual void applyCommonConfig(void *endpointConfig, const XioEndpointConfig *baseConfig)#
-
struct XioEndpointConfig#
Base configuration structure for all endpoints.
Contains common testing parameters that apply to all endpoints. Endpoints can extend this with their own configuration structures via the endpointConfig pointer.
Public Functions
-
XioEndpointConfig() = default#
-
inline XioEndpointConfig(unsigned iter, unsigned threads = 1)#
Public Members
-
void *completionQueue = nullptr#
-
long long delayNs = 0#
-
void *endpointConfig = nullptr#
-
unsigned long long int *endTimes = nullptr#
-
unsigned iterations = 128#
-
unsigned memoryMode = 0#
-
unsigned numThreads = 1#
-
bool pciMmioBridge = false#
-
unsigned long long int *startTimes = nullptr#
-
bool *stopRequested = nullptr#
-
void *submissionQueue = nullptr#
-
XioSubstepStats *substepStats = nullptr#
-
XioTimingStats *timingStats = nullptr#
-
bool verbose = false#
-
uint32_t verifyFail = 0#
-
uint32_t verifyPass = 0#
-
XioEndpointConfig() = default#
-
struct XioTimingStats#
Timing statistics for less-timing mode.
Tracks min, max, sum, and count of IO completion times.
Endpoint Registry#
-
struct EndpointInfo#
-
std::unique_ptr<XioEndpoint> xio::createEndpoint(EndpointType type)#
Create an endpoint instance by type enum.
Note
Always check the return value before dereferencing.
- Parameters:
type – Endpoint type identifier.
- Returns:
Unique pointer to the created endpoint, or nullptr if
typeis EndpointType::UNKNOWN or not supported.
-
std::unique_ptr<XioEndpoint> xio::createEndpoint(const std::string &endpointName)#
Create an endpoint instance by name string.
Note
Always check the return value before dereferencing.
- Parameters:
endpointName – Name of the endpoint (case-insensitive).
- Returns:
Unique pointer to the created endpoint, or nullptr if the name is unknown or not supported.
Both createEndpoint overloads return a null pointer when the type or
name is unknown. Callers must check the result before use.
Memory and Buffer Management#
-
struct xioBufferInfo#
Buffer allocation result structure.
-
struct xioQueueSetup#
Queue setup structure for unified initialization.
-
hipError_t xio::allocateQueue(size_t size, bool isDeviceMemory, const char *queueName, void **ptr)#
Allocate queue memory (device or host).
- Parameters:
size – Size of queue in bytes.
isDeviceMemory – true for hipMalloc, false for hipHostMalloc.
queueName – Name for error messages.
ptr – Output pointer to allocated memory.
- Returns:
hipSuccess on success.
-
void xio::freeQueue(void *ptr, bool isDeviceMemory, const char *queueName)#
Free queue memory.
- Parameters:
ptr – Pointer to free.
isDeviceMemory – true if allocated with hipMalloc.
queueName – Name for error messages.
-
hipError_t xio::allocateGpuAccessibleBuffer(size_t size, unsigned memoryMode, uint16_t targetBdf, const char *devicePath, struct xioBufferInfo *bufferInfo)#
Allocate a GPU-accessible buffer for DMA.
Allocates device or host memory based on memoryMode bit 3 and sets it up for GPU access and DMA operations.
- Parameters:
size – Buffer size in bytes.
memoryMode – Memory mode flags (bit 3 = device).
targetBdf – Target BDF for DMA-BUF registration.
devicePath – Path for emulation detection.
bufferInfo – Output buffer information.
- Returns:
hipSuccess on success, error code on failure.
-
void xio::freeGpuAccessibleBuffer(struct xioBufferInfo *bufferInfo)#
Free a GPU-accessible buffer.
- Parameters:
bufferInfo – Buffer info from allocateGpuAccessibleBuffer.
-
int xio::setupQueueForGpu(size_t size, bool is_device, uint16_t nvme_bdf, const char *kernel_module_device, bool is_emulated, const char *queue_name, struct xioQueueSetup *setup)#
Set up a queue for GPU and PCIe access.
Performs allocation, coherent reallocation, HIP registration, GPU pointer acquisition, and physical address resolution.
- Parameters:
size – Queue size in bytes.
is_device – true for device memory.
nvme_bdf – NVMe controller BDF.
kernel_module_device – Kernel module device path.
is_emulated – true if NVMe is emulated.
queue_name – Name for logging.
setup – Output structure with pointers/handles.
- Returns:
0 on success, negative error code on failure.
-
int xio::registerMemoryForGpu(void *host_ptr, size_t size, const char *name, void **gpu_ptr_out)#
Register memory with HIP for GPU access.
- Parameters:
host_ptr – Host-accessible pointer.
size – Size in bytes.
name – Name for logging.
gpu_ptr_out – Output GPU-accessible pointer.
- Returns:
0 on success, negative error code on failure.
Doorbell and MMIO#
-
struct pci_mmio_bridge_ring_meta#
PCI MMIO Bridge ring metadata (matches QEMU device).
-
struct pci_mmio_bridge_command#
PCI MMIO Bridge command structure (matches QEMU device).
-
int xio::mapPciBar(uint16_t pci_bdf, uint8_t bar, void **bar_cpu, void **bar_gpu, size_t bar_size)#
Map a PCI BAR for GPU access.
- Parameters:
pci_bdf – PCI device BDF in 0xBBDD format.
bar – BAR number (0-5).
bar_cpu – Output CPU-accessible BAR pointer.
bar_gpu – Output GPU-accessible BAR pointer.
bar_size – Size to map (defaults to 8192 if 0).
- Returns:
0 on success, negative error code on failure.
-
void xio::genPciMmioBridgeCmd(void *shadowBufferVirt, uint16_t targetBdf, uint8_t targetBar, uint32_t offset, uint64_t value, uint8_t command, uint8_t size)#
Generate and submit a PCI MMIO bridge command.
Handles ring buffer management, command structure population, and memory ordering.
- Parameters:
shadowBufferVirt – Shadow buffer pointer.
targetBdf – Target device BDF (0xBBDD format).
targetBar – Target BAR number.
offset – Offset within BAR.
value – Value to write (or read result).
command – Command type (PCI_MMIO_BRIDGE_CMD_*).
size – Transfer size in bytes (1, 2, 4, or 8).
Device Helpers#
-
void xio::printDeviceInfo()#
Print information about all available GPU devices.
-
bool xio::checkKernelModuleLoaded()#
Check if the rocm-xio kernel module is loaded.
- Returns:
true if module is loaded.
-
int xio::loadKernelModule()#
Load the rocm-xio kernel module via modprobe.
- Returns:
0 on success, negative error code on failure.
-
int xio::detectBdfFromDevice(const char *device_path, uint16_t *bdf_out)#
Detect PCI BDF from a device file path.
- Parameters:
device_path – Device path (e.g., “/dev/nvme0”).
bdf_out – Output BDF in 0xBBDD format.
- Returns:
0 on success, negative error code on failure.
NVMe Endpoint#
-
struct nvmeEpConfig#
NVMe Endpoint Configuration Structure
Contains all NVMe-specific configuration options that were previously scattered in the main tester’s cmdLineArgs structure. This structure groups related fields using nested structs that mirror the POD structs used in device code.
Public Functions
-
inline nvmeEpConfig()#
Public Members
-
std::string accessPattern#
-
uint64_t baseLba#
-
uint32_t batchSize#
-
struct xio::nvme_ep::nvmeEpConfig::[anonymous] bufferParams#
-
size_t bufferSize#
-
std::string controller#
-
uint64_t cqBaseAddr#
-
size_t cqSize#
-
uint64_t doorbellAddr#
-
struct xio::nvme_ep::nvmeEpConfig::[anonymous] doorbellParams#
-
bool infiniteMode#
-
struct xio::nvme_ep::nvmeEpConfig::[anonymous] ioParams#
-
uint64_t lbaRangeLbas#
-
unsigned lbaSize#
-
uint32_t lbasPerIo#
-
uint32_t lfsrSeed#
-
uint16_t mmioBridgeBdf#
-
uint32_t nsid#
-
uint16_t numQueues#
-
void *nvmeBar0Gpu#
-
uint16_t nvmeTargetBdf#
-
uint16_t queueId#
-
std::vector<uint16_t> queueIds#
-
struct nvme_queue_info queueInfo#
-
std::vector<struct nvme_queue_info> queueInfos#
-
uint16_t queueLength#
-
bool queuesCreated#
-
int readIo#
-
void *shadowBufferVirt#
-
uint64_t sqBaseAddr#
-
size_t sqSize#
-
bool usePciMmioBridge#
-
bool verify = false#
-
uint32_t wavefrontSize#
-
int writeIo#
-
inline nvmeEpConfig()#
-
struct nvmeIoParams#
NVMe I/O parameters for device function execution
Contains all NVMe-specific I/O configuration parameters needed for driveEndpoint execution. This POD struct can be safely passed to GPU device code.
-
struct nvmeDoorbellParams#
NVMe doorbell parameters for controller notification
Contains doorbell configuration supporting both PCI MMIO bridge mode and direct BAR0 access. At least one mode must be configured.
-
struct nvmeBufferParams#
NVMe data buffer parameters for read/write operations
Contains buffer pointers and DMA addresses for data transfer operations. Buffers must be GPU-accessible. DMA addresses are used for PRP entries.
Public Members
-
size_t bufferSize#
-
uint32_t prpEntriesPerCmd#
-
uint64_t *prpListPool#
-
uint64_t prpListPoolDma#
-
uint8_t *readBuffer#
-
uint64_t readBufferDma#
-
uint32_t readNumPages#
-
uint64_t *readPagePhysAddrs#
-
uint8_t *writeBuffer#
-
uint64_t writeBufferDma#
-
uint32_t writeNumPages#
-
uint64_t *writePagePhysAddrs#
-
size_t bufferSize#
-
struct DataPatternParams#
RDMA Endpoint#
-
struct RdmaEpConfig#
Configuration for the RDMA endpoint.
Validated by validateConfig(). Controls provider selection, queue sizing, loopback vs 2-node mode, and optional data-pattern verification.
2-Node Mode Fields
Mutually exclusive with loopback mode.
-
bool isServer = false#
Run as 2-node server.
-
bool isClient = false#
Run as 2-node client.
-
std::string serverHost#
Server hostname/IP.
-
uint32_t ppSize = 64#
Ping-pong total bytes (seq + payload).
-
uint32_t ppIters = 100#
Ping-pong iterations.
Public Members
-
uint32_t batchSize = 1#
WQEs per doorbell ring.
-
unsigned cqDepth = 256#
Completion-queue depth.
-
std::string deviceName#
RDMA device name filter.
-
int gpuDeviceId = 0#
HIP GPU device index.
-
bool infiniteMode = false#
Run forever (SIGINT).
-
uint32_t inlineThreshold = 28#
Max inline send bytes.
-
unsigned iterations = 128#
RDMA ops per run.
-
bool loopback = true#
Loopback mode (default).
-
uint16_t numQueues = 1#
Independent QP count.
-
bool pcieRelaxedOrdering = false#
PCIe relaxed ordering.
-
Provider provider = Provider::BNXT#
Resolved provider enum.
-
std::string providerStr = "bnxt"#
Provider name string.
-
QueueMemMode queueMem = QueueMemMode::HOST_COHERENT#
Queue buffer placement.
-
uint32_t seed = 1#
LFSR seed value.
-
unsigned sqDepth = 256#
Send-queue depth.
-
int trafficClass = 0#
QP address-handle TC.
-
uint32_t transferSize = 4096#
Bytes per RDMA WRITE.
-
bool verify = false#
LFSR verification flag.
-
bool isServer = false#
Vendor WQE and CQE layouts (struct rdma_wqe, struct rdma_cqe) live in
generated RDMA headers under src/endpoints/rdma-ep/ (see
scripts/build/generate-rdma-vendor-headers.sh). They are not present until
those headers are generated, so they are omitted from this auto-generated API
page.
SDMA Endpoint#
Configuration#
-
struct SdmaEpConfig#
SDMA endpoint test configuration.
Contains all user-facing options for the xio-tester sdma-ep subcommand. Validated by validateConfig().
Public Members
-
int dstDeviceId = -1#
Destination HIP device ID. -1 = default (1 for P2P, 0 for —to-host).
-
unsigned iterations = 128#
Number of SDMA transfers per run.
-
int srcDeviceId = -1#
Source HIP device ID. -1 = default (0).
-
std::string testType = ""#
Test subcommand name: “p2p”, “ping-pong”, or “buffer-reuse”.
-
size_t transferSize = 4096#
Per-iteration transfer size in bytes. Must be a multiple of 4.
-
bool useCounter = false#
Use counter-based completion tracking.
-
bool useFlush = false#
Use flush-based completion tracking.
-
bool useHostDst = false#
If true, destination is pinned host memory (single GPU, no P2P required).
-
bool verifyData = false#
If true, validate the destination buffer after transfer.
-
int dstDeviceId = -1#
Host-Side Setup#
-
struct SdmaConnectionInfo#
Information about an established SDMA connection.
Returned by createConnection(). Contains the resolved SDMA engine ID for the GPU pair, which is determined by the XGMI/Infinity Fabric topology (MI300X OAM map).
-
struct SdmaQueueInfo#
Information about a created SDMA queue.
Returned by createQueue(). The deviceHandle pointer is GPU-accessible and should be passed to GPU kernels that use the device-side SDMA operations (put, signal, waitSignal, flush, quiet).
-
int xio::sdma_ep::initEndpoint()#
Initialize the SDMA endpoint subsystem.
Sets up the HSA runtime, enumerates GPU and CPU agents, and opens the KFD (Kernel Fusion Driver) interface. Must be called before createConnection() or createQueue().
Idempotent: safe to call multiple times; subsequent calls are no-ops.
- Returns:
0 on success, negative error code on failure.
-
void xio::sdma_ep::shutdownEndpoint()#
Mark the SDMA endpoint subsystem as inactive.
Resets the internal initialization flag so that subsequent createConnection() / createQueue() calls will fail until initEndpoint() is called again.
Note
This does NOT destroy existing SDMA queues or shut down HSA/KFD. Queue and HSA resources are released when the AnvilLib singleton is destroyed at process exit. Call destroyQueue() on individual queues for explicit cleanup.
Note
Because the underlying HSA init uses std::call_once, calling initEndpoint() after shutdownEndpoint() re-enables the flag but does not re-run HSA/KFD initialization.
-
int xio::sdma_ep::createConnection(int srcDeviceId, int dstDeviceId, SdmaConnectionInfo *info)#
Create an SDMA connection between two GPUs.
Enables P2P peer access from the source GPU to the destination GPU and resolves the XGMI-topology- optimal SDMA engine ID for this GPU pair (using the MI300X OAM map). For bidirectional transfers, call once for each direction.
Must be called after initEndpoint() and before createQueue() for the same GPU pair.
- Parameters:
srcDeviceId – Source HIP device ID.
dstDeviceId – Destination HIP device ID.
info – Output connection information.
- Returns:
0 on success, negative error code on failure.
-
int xio::sdma_ep::createQueue(int srcDeviceId, int dstDeviceId, SdmaQueueInfo *info)#
Create an SDMA queue for a GPU pair.
Allocates a 1 MiB ring buffer in device memory, creates an SDMA queue via hsakmt, and populates a GPU-accessible device handle (SdmaQueueHandle).
Must be called after createConnection() for the same GPU pair. The returned SdmaQueueInfo::deviceHandle is a pointer in device memory that can be passed directly to GPU kernels.
- Parameters:
srcDeviceId – Source HIP device ID.
dstDeviceId – Destination HIP device ID.
info – Output queue information.
- Returns:
0 on success, negative error code on failure.
-
void xio::sdma_ep::destroyQueue(SdmaQueueInfo *info)#
Destroy an SDMA queue.
Releases the ring buffer, device handle memory, and hsakmt queue resources associated with the given queue.
- Parameters:
info – Queue information from createQueue(). The deviceHandle becomes invalid after this call.
Device-Side Operations#
-
void xio::sdma_ep::put(SdmaQueueHandle &handle, void *dst, void *src, size_t size)#
DMA copy via SDMA engine.
Submits an SDMA_PKT_COPY_LINEAR to transfer size bytes from src to dst. The transfer is non-blocking: it completes asynchronously after the SDMA engine processes the packet.
Note
Device-only. Thread-safe for multi-producer queues (uses atomic CAS for reservation).
- Parameters:
handle – Queue handle (multi-producer safe).
dst – Destination address (GPU virtual).
src – Source address (GPU virtual).
size – Number of bytes to transfer.
-
void xio::sdma_ep::putTile(SdmaQueueHandle &handle, void *dst, void *src, uint32_t tileWidth, uint32_t tileHeight, uint32_t srcPitch, uint32_t dstPitch, uint32_t srcX, uint32_t srcY, uint32_t dstX, uint32_t dstY)#
2D sub-window DMA copy via SDMA engine.
Copies a rectangular tile from a source buffer to a destination buffer using SDMA_PKT_LINEAR_LARGE_SUB_WINDOW_COPY.
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
dst – Destination buffer base address.
src – Source buffer base address.
tileWidth – Tile width in bytes.
tileHeight – Tile height in rows.
srcPitch – Source row stride in bytes.
dstPitch – Destination row stride in bytes.
srcX – Source X offset in bytes.
srcY – Source Y offset in rows.
dstX – Destination X offset in bytes.
dstY – Destination Y offset in rows.
-
void xio::sdma_ep::signal(SdmaQueueHandle &handle, uint64_t *signal)#
Atomically increment a signal via SDMA engine.
Submits an SDMA_PKT_ATOMIC that adds 1 to the 64-bit value at *signal. The increment is performed by the SDMA engine, not the shader ALU.
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
signal – Address of a 64-bit signal counter in device memory (uncached recommended).
-
void xio::sdma_ep::putSignal(SdmaQueueHandle &handle, void *dst, void *src, size_t size, uint64_t *signal)#
DMA copy with completion signal (batched).
Combines a linear copy and an atomic signal increment into a single queue submission. The signal is incremented after the copy completes.
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
dst – Destination address (GPU virtual).
src – Source address (GPU virtual).
size – Number of bytes to transfer.
signal – Address of a 64-bit signal counter.
-
void xio::sdma_ep::putSignalCounter(SdmaQueueHandle &handle, void *dst, void *src, size_t size, uint64_t *signal, uint64_t *counter)#
DMA copy with signal and counter (batched).
Combines a linear copy, a signal increment, and a counter increment into a single queue submission.
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
dst – Destination address (GPU virtual).
src – Source address (GPU virtual).
size – Number of bytes to transfer.
signal – Address of a 64-bit signal counter.
counter – Address of a 64-bit counter.
-
void xio::sdma_ep::putCounter(SdmaQueueHandle &handle, void *dst, void *src, size_t size, uint64_t *counter)#
DMA copy with counter only (batched).
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
dst – Destination address (GPU virtual).
src – Source address (GPU virtual).
size – Number of bytes to transfer.
counter – Address of a 64-bit counter.
-
void xio::sdma_ep::signalCounter(SdmaQueueHandle &handle, uint64_t *signal, uint64_t *counter)#
Signal and counter increment (no copy).
Note
Device-only. Non-blocking.
- Parameters:
handle – Queue handle.
signal – Address of a 64-bit signal counter.
counter – Address of a 64-bit counter.
-
void xio::sdma_ep::waitSignal(uint64_t *addr, uint64_t expected)#
Wait for a signal to reach a value.
Spin-polls the 64-bit value at *addr until it is >= expected. Use after putSignal() or signal() to wait for remote completion.
Note
Device-only. Blocking (spins until condition met). Uses agent-scope relaxed atomics.
- Parameters:
addr – Address of a 64-bit signal in device memory (uncached recommended).
expected – Minimum value to wait for. Typically the number of signals sent.
-
void xio::sdma_ep::waitCounter(uint64_t *addr, uint64_t expected)#
Wait for a counter to reach a value.
Identical semantics to waitSignal(); provided as a separate function for clarity when waiting on a counter rather than a signal.
Note
Device-only. Blocking.
- Parameters:
addr – Address of a 64-bit counter.
expected – Minimum value to wait for.
-
void xio::sdma_ep::flush(SdmaQueueHandle &handle, uint64_t upToIndex)#
Wait for a specific operation to complete.
Spin-polls the hardware read pointer until it reaches or passes upToIndex. Use with the put_index output of put_signal_counter_impl to wait for a specific put without waiting for subsequent signals.
Note
Device-only. Blocking.
- Parameters:
handle – Queue handle.
upToIndex – Write pointer value to wait for (from put_index tracking).
-
void xio::sdma_ep::quiet(SdmaQueueHandle &handle)#
Wait for ALL submitted operations to complete.
Spin-polls the hardware read pointer until it reaches maxWritePtr, meaning every packet submitted to this queue has been consumed by the SDMA engine.
Note
Device-only. Blocking.
- Parameters:
handle – Queue handle.
Validation#
-
std::string xio::sdma_ep::validateConfig(SdmaEpConfig *config)#
Validate SDMA endpoint configuration.
Checks that a test subcommand was selected, that —use-counter and —use-flush are not both set, and that transfer-size is > 0 and a multiple of 4.
- Parameters:
config – Configuration to validate.
- Returns:
Empty string if valid, error message otherwise.
-
unsigned xio::sdma_ep::getIterations(void *endpointConfig)#
Get the iteration count for this configuration.
- Parameters:
endpointConfig – Opaque pointer to SdmaEpConfig.
- Returns:
Number of iterations to run.
Test Endpoint#
-
struct TestEpConfig#
Test endpoint configuration structure
Contains test-ep-specific configuration parameters that control CPU thread behaviour, doorbell mode, emulation, and iteration count.
Public Functions
-
TestEpConfig() = default#
Default constructor.
Public Members
-
unsigned doorbell = 0#
Doorbell mode queue length.
When greater than 0 doorbell mode is enabled with the specified queue length. A single CPU thread polls a doorbell address instead of individual SQEs. The GPU writes SQEs to the submission queue and rings the doorbell after each enqueue. Doorbell location is controlled by memory mode bit 2 (0=host, 1=device). A value of 0 disables doorbell mode (polling mode).
-
bool emulate = false#
Emulate mode flag.
When true the kernel code runs on CPU threads instead of the GPU. Useful for testing without a GPU or in CI environments.
-
bool enableCpuThreads = true#
Enable CPU threads to poll SQEs and generate CQEs.
When enabled, CPU threads poll host memory for SQEs written by the GPU and generate CQEs with optional delay support.
-
unsigned iterations = 128#
Number of iterations to run.
-
uint32_t seed = 1#
Base seed for LFSR pattern verification.
-
bool verify = false#
LFSR data pattern verification.
When true, the GPU fills the SQE data payload with an LFSR pattern (per-iteration seed) and verifies it after each CQE round-trip.
-
TestEpConfig() = default#
-
struct test_sqe#
Submission queue entry (SQE) for the test endpoint
Written by the GPU and read by CPU polling threads. Layout matches the simple-test GpuToCpuMsg structure.
-
struct test_cqe#
Completion queue entry (CQE) for the test endpoint
Written by the CPU and polled by the GPU. Layout matches the simple-test CpuToGpuMsg structure.
Kernel Module IOCTL Structures#
-
struct rocm_xio_vram_req#
VRAM physical address resolution request (GET_VRAM_PHYS_ADDR).
-
struct rocm_xio_register_queue_addr_req#
Register queue virtual/physical mapping (REGISTER_QUEUE_ADDR).
-
struct rocm_xio_register_buffer_req#
Register data buffer for PRP injection (REGISTER_BUFFER).
-
struct rocm_xio_mmio_bridge_shadow_req#
MMIO bridge shadow buffer mapping (GET_MMIO_BRIDGE_SHADOW_BUFFER).
-
struct rocm_xio_alloc_contig_req#
Contiguous queue allocation (ALLOC_CONTIG_QUEUE).