TransferBench API library#
-
struct ConfigOptions#
- #include <TransferBench.hpp>
Configuration options for performing Transfers
Public Members
-
GeneralOptions general#
General options.
-
DataOptions data#
Data options.
-
GfxOptions gfx#
GFX executor options.
-
DmaOptions dma#
DMA executor options.
-
GeneralOptions general#
-
struct DataOptions#
- #include <TransferBench.hpp>
Data options
Public Members
-
int alwaysValidate = 0#
Validate after each iteration instead of once at end.
-
int blockBytes = 256#
Each subexecutor works on a multiple of this many bytes.
-
int byteOffset = 0#
Byte-offset for memory allocations.
-
vector<float> fillPattern = {}#
Pattern of floats used to fill source data.
-
int validateDirect = 0#
Validate GPU results directly instead of copying to host.
-
int validateSource = 0#
Validate src GPU memory immediately after preparation.
-
int alwaysValidate = 0#
-
struct DmaOptions#
- #include <TransferBench.hpp>
DMA Executor options
-
struct ErrResult#
- #include <TransferBench.hpp>
ErrResult consists of error type and error message
-
struct ExeDevice#
- #include <TransferBench.hpp>
A ExeDevice defines a specific Executor
-
struct ExeResult#
- #include <TransferBench.hpp>
Results for a single Executor
-
struct GeneralOptions#
- #include <TransferBench.hpp>
General options
Public Members
-
int numIterations = 10#
Number of timed iterations to perform. If negative, run for -numIterations seconds instead.
-
int numSubIterations = 1#
Number of sub-iterations per iteration.
-
int numWarmups = 3#
Number of un-timed warmup iterations to perform.
-
int recordPerIteration = 0#
Record per-iteration timing information.
-
int useInteractive = 0#
Pause for user-input before starting transfer loop.
-
int numIterations = 10#
-
struct GfxOptions#
- #include <TransferBench.hpp>
GFX Executor options
Public Members
-
int blockSize = 256#
Size of each threadblock (must be multiple of 64)
-
vector<uint32_t> cuMask = {}#
Bit-vector representing the CU mask.
-
vector<vector<int>> prefXccTable = {}#
2D table with preferred XCD to use for a specific [src][dst] GPU device
-
int unrollFactor = 4#
GFX-kernel unroll factor.
-
int useHipEvents = 1#
Use HIP events for timing GFX Executor.
-
int useMultiStream = 0#
Use multiple streams for GFX.
-
int useSingleTeam = 0#
Team all subExecutors across the data array.
-
int waveOrder = 0#
GFX-kernel wavefront ordering.
-
int blockSize = 256#
-
struct MemDevice#
- #include <TransferBench.hpp>
A MemDevice indicates a memory type on a specific device
-
struct TestResults#
- #include <TransferBench.hpp>
TestResults contain timing results for a set of Transfers as a group as well as per Executor and per Transfer timing information
Public Members
-
int numTimedIterations#
Number of iterations executed.
-
size_t totalBytesTransferred#
Total bytes transferred per iteration.
-
double avgTotalDurationMsec#
Wall-time (msec) to finish all Transfers (averaged across all timed iterations)
-
double avgTotalBandwidthGbPerSec#
Bandwidth based on all Transfers and average wall time.
-
double overheadMsec#
Difference between total wall time and slowest executor.
-
vector<TransferResult> tfrResults#
Per Transfer results.
-
int numTimedIterations#
-
struct Transfer#
- #include <TransferBench.hpp>
A Transfer adds together data from zero or more sources then writes the sum to zero or more desintations
-
struct TransferResult#
- #include <TransferBench.hpp>
Results for a single Transfer
-
namespace std#
STL namespace.
-
namespace TransferBench#
Enums
-
enum ExeType#
Enumeration of supported Executor types
Note
The Executor is the device used to perform a Transfer
Note
IBVerbs executor is currently not implemented yet
Values:
-
enumerator EXE_CPU#
CPU executor (subExecutor = CPU thread)
-
enumerator EXE_GPU_GFX#
GPU kernel-based executor (subExecutor = threadblock/CU)
-
enumerator EXE_GPU_DMA#
GPU SDMA executor (subExecutor = not supported)
-
enumerator EXE_IBV#
IBVerbs executor (subExecutor = queue pair)
-
enumerator EXE_CPU#
-
enum MemType#
Enumeration of supported memory types
Note
These are possible types of memory to be used as sources/destinations
Values:
-
enumerator MEM_CPU#
Coarse-grained pinned CPU memory.
-
enumerator MEM_GPU#
Coarse-grained global GPU memory.
-
enumerator MEM_CPU_FINE#
Fine-grained pinned CPU memory.
-
enumerator MEM_GPU_FINE#
Fine-grained global GPU memory.
-
enumerator MEM_CPU_UNPINNED#
Unpinned CPU memory.
-
enumerator MEM_NULL#
NULL memory - used for empty.
-
enumerator MEM_MANAGED#
Managed memory.
-
enumerator MEM_CPU#
-
enum ErrType#
Enumeration of possible error types
Values:
-
enumerator ERR_NONE#
No errors.
-
enumerator ERR_WARN#
Warning - results may not be accurate.
-
enumerator ERR_FATAL#
Fatal error - results are invalid.
-
enumerator ERR_NONE#
Functions
-
bool RunTransfers(ConfigOptions const &config, vector<Transfer> const &transfers, TestResults &results)#
Run a set of Transfers
- Parameters:
config – [in] Configuration options
transfers – [in] Set of Transfers to execute
results – [out] Timing results
- Returns:
true if and only if Transfers were run successfully without any fatal errors
-
int GetIntAttribute(IntAttribute attribute)#
Query attributes (integer)
Note
This allows querying of implementation information such as limits
- Parameters:
attribute – [in] Attribute to query
- Returns:
Value of the attribute
-
std::string GetStrAttribute(StrAttribute attribute)#
Query attributes (string)
Note
This allows query of implementation details such as limits
- Parameters:
attrtibute – [in] Attribute to query
- Returns:
Value of the attribute
-
int GetNumExecutors(ExeType exeType)#
Returns information about number of available available Executors
- Parameters:
exeType – [in] Executor type to query
- Returns:
Number of detected Executors of exeType
-
int GetNumExecutorSubIndices(ExeDevice exeDevice)#
Returns the number of possible Executor subindices
Note
For CPU, this is 0
Note
For GFX, this refers to the number of XCDs
Note
For DMA, this refers to the number of DMA engines
- Parameters:
exeDevice – [in] The specific Executor to query
- Returns:
Number of detected executor subindices
-
int GetNumSubExecutors(ExeDevice exeDevice)#
Returns number of subExecutors for a given ExeDevice
- Parameters:
exeDevice – [in] The specific Executor to query
- Returns:
Number of detected subExecutors for the given ExePair
-
int GetClosestCpuNumaToGpu(int gpuIndex)#
Returns the index of the NUMA node closest to the given GPU
- Parameters:
gpuIndex – [in] Index of the GPU to query
- Returns:
NUMA node index closest to GPU gpuIndex, or -1 if unable to detect
-
ErrResult ParseTransfers(std::string str, std::vector<Transfer> &transfers)#
Helper function to parse a line containing Transfers into a vector of Transfers
- Parameters:
str – [in] String containing description of Transfers
transfers – [out] List of Transfers described by ‘str’
- Returns:
Information about any error that may have occured
-
enum ExeType#
- file TransferBench.hpp
- dir /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-transferbench/checkouts/latest/src/header
- dir /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-transferbench/checkouts/latest/src