rocm_smi.h File Reference#
The rocm_smi library api is new, and therefore subject to change either at the ABI or API level. Instead of marking every function prototype as "unstable", we are instead saying the API is unstable (i.e., changes are possible) while the major version remains 0. This means that if the API/ABI changes, we will not increment the major version to 1. Once the ABI stabilizes, we will increment the major version to 1, and thereafter increment it on all ABI breaks. More...
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include "rocm_smi/kfd_ioctl.h"
Go to the source code of this file.
Data Structures | |
struct | rsmi_counter_value_t |
struct | rsmi_evt_notification_data_t |
struct | rsmi_utilization_counter_t |
The utilization counter data. More... | |
struct | rsmi_retired_page_record_t |
Reserved Memory Page Record. More... | |
struct | rsmi_power_profile_status_t |
This structure contains information about which power profiles are supported by the system for a given device, and which power profile is currently active. More... | |
struct | rsmi_frequencies_t |
This structure holds information about clock frequencies. More... | |
struct | rsmi_pcie_bandwidth_t |
This structure holds information about the possible PCIe bandwidths. Specifically, the possible transfer rates and their associated numbers of lanes are stored here. More... | |
struct | rsmi_version_t |
This structure holds version information. More... | |
struct | rsmi_range_t |
This structure represents a range (e.g., frequencies or voltages). More... | |
struct | rsmi_od_vddc_point_t |
This structure represents a point on the frequency-voltage plane. More... | |
struct | rsmi_freq_volt_region_t |
This structure holds 2 rsmi_range_t's, one for frequency and one for voltage. These 2 ranges indicate the range of possible values for the corresponding rsmi_od_vddc_point_t. More... | |
struct | rsmi_od_volt_curve_t |
struct | rsmi_od_volt_freq_data_t |
This structure holds the frequency-voltage values for a device. More... | |
struct | metrics_table_header_t |
The following structures hold the gpu metrics values for a device. More... | |
struct | rsmi_gpu_metrics_t |
struct | rsmi_error_count_t |
This structure holds error counts. More... | |
struct | rsmi_process_info_t |
This structure contains information specific to a process. More... | |
union | id |
This union holds the value of an rsmi_func_id_iter_handle_t. The value may be a function name, or an ennumerated variant value of types such as rsmi_memory_type_t, rsmi_temperature_metric_t, etc. More... | |
Macros | |
#define | RSMI_MAX_NUM_FREQUENCIES 32 |
Guaranteed maximum possible number of supported frequencies. | |
#define | RSMI_MAX_FAN_SPEED 255 |
#define | RSMI_NUM_VOLTAGE_CURVE_POINTS 3 |
The number of points that make up a voltage-frequency curve definition. | |
#define | RSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) |
#define | MAX_EVENT_NOTIFICATION_MSG_SIZE 64 |
Maximum number of characters an event notification message will be. | |
#define | RSMI_MAX_NUM_POWER_PROFILES (sizeof(rsmi_bit_field_t) * 8) |
Number of possible power profiles that a system could support. | |
#define | RSMI_GPU_METRICS_API_FORMAT_VER 1 |
The following structure holds the gpu metrics values for a device. | |
#define | RSMI_GPU_METRICS_API_CONTENT_VER_1 1 |
#define | RSMI_GPU_METRICS_API_CONTENT_VER_2 2 |
#define | RSMI_GPU_METRICS_API_CONTENT_VER_3 3 |
#define | RSMI_NUM_HBM_INSTANCES 4 |
#define | CENTRIGRADE_TO_MILLI_CENTIGRADE 1000 |
#define | RSMI_DEFAULT_VARIANT 0xFFFFFFFFFFFFFFFF |
Typedefs | |
typedef uintptr_t | rsmi_event_handle_t |
Handle to performance event counter. More... | |
typedef uint64_t | rsmi_bit_field_t |
Bitfield used in various RSMI calls. | |
typedef enum _RSMI_IO_LINK_TYPE | RSMI_IO_LINK_TYPE |
Types for IO Link. | |
typedef struct rsmi_func_id_iter_handle * | rsmi_func_id_iter_handle_t |
Opaque handle to function-support object. | |
typedef union id | rsmi_func_id_value_t |
This union holds the value of an rsmi_func_id_iter_handle_t. The value may be a function name, or an ennumerated variant value of types such as rsmi_memory_type_t, rsmi_temperature_metric_t, etc. | |
Enumerations | |
enum | rsmi_status_t { RSMI_STATUS_SUCCESS = 0x0 , RSMI_STATUS_INVALID_ARGS , RSMI_STATUS_NOT_SUPPORTED , RSMI_STATUS_FILE_ERROR , RSMI_STATUS_PERMISSION , RSMI_STATUS_OUT_OF_RESOURCES , RSMI_STATUS_INTERNAL_EXCEPTION , RSMI_STATUS_INPUT_OUT_OF_BOUNDS , RSMI_STATUS_INIT_ERROR , RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR , RSMI_STATUS_NOT_YET_IMPLEMENTED , RSMI_STATUS_NOT_FOUND , RSMI_STATUS_INSUFFICIENT_SIZE , RSMI_STATUS_INTERRUPT , RSMI_STATUS_UNEXPECTED_SIZE , RSMI_STATUS_NO_DATA , RSMI_STATUS_UNEXPECTED_DATA , RSMI_STATUS_BUSY , RSMI_STATUS_REFCOUNT_OVERFLOW , RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF } |
Error codes retured by rocm_smi_lib functions. More... | |
enum | rsmi_init_flags_t { RSMI_INIT_FLAG_ALL_GPUS = 0x1 , RSMI_INIT_FLAG_RESRV_TEST1 = 0x800000000000000 } |
Initialization flags. More... | |
enum | rsmi_dev_perf_level_t { RSMI_DEV_PERF_LEVEL_AUTO = 0 , RSMI_DEV_PERF_LEVEL_FIRST = RSMI_DEV_PERF_LEVEL_AUTO , RSMI_DEV_PERF_LEVEL_LOW , RSMI_DEV_PERF_LEVEL_HIGH , RSMI_DEV_PERF_LEVEL_MANUAL , RSMI_DEV_PERF_LEVEL_STABLE_STD , RSMI_DEV_PERF_LEVEL_STABLE_PEAK , RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK , RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK , RSMI_DEV_PERF_LEVEL_DETERMINISM , RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_DETERMINISM , RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 } |
PowerPlay performance levels. More... | |
enum | rsmi_sw_component_t { RSMI_SW_COMP_FIRST = 0x0 , RSMI_SW_COMP_DRIVER = RSMI_SW_COMP_FIRST , RSMI_SW_COMP_LAST = RSMI_SW_COMP_DRIVER } |
Available clock types. More... | |
enum | rsmi_event_group_t { RSMI_EVNT_GRP_XGMI = 0 , RSMI_EVNT_GRP_XGMI_DATA_OUT = 10 , RSMI_EVNT_GRP_INVALID = 0xFFFFFFFF } |
Enum denoting an event group. The value of the enum is the base value for all the event enums in the group. More... | |
enum | rsmi_event_type_t { RSMI_EVNT_FIRST = RSMI_EVNT_GRP_XGMI , RSMI_EVNT_XGMI_FIRST = RSMI_EVNT_GRP_XGMI , RSMI_EVNT_XGMI_0_NOP_TX = RSMI_EVNT_XGMI_FIRST , RSMI_EVNT_XGMI_0_REQUEST_TX , RSMI_EVNT_XGMI_0_RESPONSE_TX , RSMI_EVNT_XGMI_0_BEATS_TX , RSMI_EVNT_XGMI_1_NOP_TX , RSMI_EVNT_XGMI_1_REQUEST_TX , RSMI_EVNT_XGMI_1_RESPONSE_TX , RSMI_EVNT_XGMI_1_BEATS_TX , RSMI_EVNT_XGMI_LAST = RSMI_EVNT_XGMI_1_BEATS_TX , RSMI_EVNT_XGMI_DATA_OUT_FIRST = RSMI_EVNT_GRP_XGMI_DATA_OUT , RSMI_EVNT_XGMI_DATA_OUT_0 = RSMI_EVNT_XGMI_DATA_OUT_FIRST , RSMI_EVNT_XGMI_DATA_OUT_1 , RSMI_EVNT_XGMI_DATA_OUT_2 , RSMI_EVNT_XGMI_DATA_OUT_3 , RSMI_EVNT_XGMI_DATA_OUT_4 , RSMI_EVNT_XGMI_DATA_OUT_5 , RSMI_EVNT_XGMI_DATA_OUT_LAST = RSMI_EVNT_XGMI_DATA_OUT_5 , RSMI_EVNT_LAST = RSMI_EVNT_XGMI_DATA_OUT_LAST } |
Event type enum. Events belonging to a particular event group rsmi_event_group_t should begin enumerating at the rsmi_event_group_t value for that group. More... | |
enum | rsmi_counter_command_t { RSMI_CNTR_CMD_START = 0 , RSMI_CNTR_CMD_STOP } |
enum | rsmi_evt_notification_type_t { RSMI_EVT_NOTIF_VMFAULT = KFD_SMI_EVENT_VMFAULT , RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT , RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE , RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET , RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET , RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET } |
enum | rsmi_clk_type_t { RSMI_CLK_TYPE_SYS = 0x0 , RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS , RSMI_CLK_TYPE_DF , RSMI_CLK_TYPE_DCEF , RSMI_CLK_TYPE_SOC , RSMI_CLK_TYPE_MEM , RSMI_CLK_TYPE_PCIE , RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM , RSMI_CLK_INVALID = 0xFFFFFFFF } |
enum | rsmi_compute_partition_type_t { RSMI_COMPUTE_PARTITION_INVALID = 0 , RSMI_COMPUTE_PARTITION_CPX , RSMI_COMPUTE_PARTITION_SPX , RSMI_COMPUTE_PARTITION_DPX , RSMI_COMPUTE_PARTITION_TPX , RSMI_COMPUTE_PARTITION_QPX } |
enum | rsmi_temperature_metric_t { RSMI_TEMP_CURRENT = 0x0 , RSMI_TEMP_FIRST = RSMI_TEMP_CURRENT , RSMI_TEMP_MAX , RSMI_TEMP_MIN , RSMI_TEMP_MAX_HYST , RSMI_TEMP_MIN_HYST , RSMI_TEMP_CRITICAL , RSMI_TEMP_CRITICAL_HYST , RSMI_TEMP_EMERGENCY , RSMI_TEMP_EMERGENCY_HYST , RSMI_TEMP_CRIT_MIN , RSMI_TEMP_CRIT_MIN_HYST , RSMI_TEMP_OFFSET , RSMI_TEMP_LOWEST , RSMI_TEMP_HIGHEST , RSMI_TEMP_LAST = RSMI_TEMP_HIGHEST } |
Temperature Metrics. This enum is used to identify various temperature metrics. Corresponding values will be in millidegress Celcius. More... | |
enum | rsmi_temperature_type_t { RSMI_TEMP_TYPE_FIRST = 0 , RSMI_TEMP_TYPE_EDGE = RSMI_TEMP_TYPE_FIRST , RSMI_TEMP_TYPE_JUNCTION , RSMI_TEMP_TYPE_MEMORY , RSMI_TEMP_TYPE_HBM_0 , RSMI_TEMP_TYPE_HBM_1 , RSMI_TEMP_TYPE_HBM_2 , RSMI_TEMP_TYPE_HBM_3 , RSMI_TEMP_TYPE_LAST = RSMI_TEMP_TYPE_HBM_3 , RSMI_TEMP_TYPE_INVALID = 0xFFFFFFFF } |
This enumeration is used to indicate from which part of the device a temperature reading should be obtained. More... | |
enum | rsmi_voltage_metric_t { RSMI_VOLT_CURRENT = 0x0 , RSMI_VOLT_FIRST = RSMI_VOLT_CURRENT , RSMI_VOLT_MAX , RSMI_VOLT_MIN_CRIT , RSMI_VOLT_MIN , RSMI_VOLT_MAX_CRIT , RSMI_VOLT_AVERAGE , RSMI_VOLT_LOWEST , RSMI_VOLT_HIGHEST , RSMI_VOLT_LAST = RSMI_VOLT_HIGHEST } |
Voltage Metrics. This enum is used to identify various Volatge metrics. Corresponding values will be in millivolt. More... | |
enum | rsmi_voltage_type_t { RSMI_VOLT_TYPE_FIRST = 0 , RSMI_VOLT_TYPE_VDDGFX = RSMI_VOLT_TYPE_FIRST , RSMI_VOLT_TYPE_LAST = RSMI_VOLT_TYPE_VDDGFX , RSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF } |
This ennumeration is used to indicate which type of voltage reading should be obtained. More... | |
enum | rsmi_power_profile_preset_masks_t { RSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1 , RSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2 , RSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4 , RSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8 , RSMI_PWR_PROF_PRST_VR_MASK = 0x10 , RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20 , RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT = 0x40 , RSMI_PWR_PROF_PRST_LAST = RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT , RSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF } |
Pre-set Profile Selections. These bitmasks can be AND'd with the rsmi_power_profile_status_t.available_profiles returned from rsmi_dev_power_profile_presets_get to determine which power profiles are supported by the system. More... | |
enum | rsmi_gpu_block_t { RSMI_GPU_BLOCK_INVALID = 0x0000000000000000 , RSMI_GPU_BLOCK_FIRST = 0x0000000000000001 , RSMI_GPU_BLOCK_UMC = RSMI_GPU_BLOCK_FIRST , RSMI_GPU_BLOCK_SDMA = 0x0000000000000002 , RSMI_GPU_BLOCK_GFX = 0x0000000000000004 , RSMI_GPU_BLOCK_MMHUB = 0x0000000000000008 , RSMI_GPU_BLOCK_ATHUB = 0x0000000000000010 , RSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020 , RSMI_GPU_BLOCK_HDP = 0x0000000000000040 , RSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080 , RSMI_GPU_BLOCK_DF = 0x0000000000000100 , RSMI_GPU_BLOCK_SMN = 0x0000000000000200 , RSMI_GPU_BLOCK_SEM = 0x0000000000000400 , RSMI_GPU_BLOCK_MP0 = 0x0000000000000800 , RSMI_GPU_BLOCK_MP1 = 0x0000000000001000 , RSMI_GPU_BLOCK_FUSE = 0x0000000000002000 , RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_FUSE , RSMI_GPU_BLOCK_RESERVED = 0x8000000000000000 } |
This enum is used to identify different GPU blocks. More... | |
enum | rsmi_ras_err_state_t { RSMI_RAS_ERR_STATE_NONE = 0 , RSMI_RAS_ERR_STATE_DISABLED , RSMI_RAS_ERR_STATE_PARITY , RSMI_RAS_ERR_STATE_SING_C , RSMI_RAS_ERR_STATE_MULT_UC , RSMI_RAS_ERR_STATE_POISON , RSMI_RAS_ERR_STATE_ENABLED , RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_ENABLED , RSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF } |
The current ECC state. More... | |
enum | rsmi_memory_type_t { RSMI_MEM_TYPE_FIRST = 0 , RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST , RSMI_MEM_TYPE_VIS_VRAM , RSMI_MEM_TYPE_GTT , RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT } |
Types of memory. More... | |
enum | rsmi_freq_ind_t { RSMI_FREQ_IND_MIN = 0 , RSMI_FREQ_IND_MAX = 1 , RSMI_FREQ_IND_INVALID = 0xFFFFFFFF } |
The values of this enum are used as frequency identifiers. More... | |
enum | rsmi_fw_block_t { RSMI_FW_BLOCK_FIRST = 0 , RSMI_FW_BLOCK_ASD = RSMI_FW_BLOCK_FIRST , RSMI_FW_BLOCK_CE , RSMI_FW_BLOCK_DMCU , RSMI_FW_BLOCK_MC , RSMI_FW_BLOCK_ME , RSMI_FW_BLOCK_MEC , RSMI_FW_BLOCK_MEC2 , RSMI_FW_BLOCK_PFP , RSMI_FW_BLOCK_RLC , RSMI_FW_BLOCK_RLC_SRLC , RSMI_FW_BLOCK_RLC_SRLG , RSMI_FW_BLOCK_RLC_SRLS , RSMI_FW_BLOCK_SDMA , RSMI_FW_BLOCK_SDMA2 , RSMI_FW_BLOCK_SMC , RSMI_FW_BLOCK_SOS , RSMI_FW_BLOCK_TA_RAS , RSMI_FW_BLOCK_TA_XGMI , RSMI_FW_BLOCK_UVD , RSMI_FW_BLOCK_VCE , RSMI_FW_BLOCK_VCN , RSMI_FW_BLOCK_LAST = RSMI_FW_BLOCK_VCN } |
The values of this enum are used to identify the various firmware blocks. | |
enum | rsmi_xgmi_status_t { RSMI_XGMI_STATUS_NO_ERRORS = 0 , RSMI_XGMI_STATUS_ERROR , RSMI_XGMI_STATUS_MULTIPLE_ERRORS } |
XGMI Status. | |
enum | rsmi_memory_page_status_t { RSMI_MEM_PAGE_STATUS_RESERVED = 0 , RSMI_MEM_PAGE_STATUS_PENDING , RSMI_MEM_PAGE_STATUS_UNRESERVABLE } |
Reserved Memory Page States. More... | |
enum | _RSMI_IO_LINK_TYPE { RSMI_IOLINK_TYPE_UNDEFINED = 0 , RSMI_IOLINK_TYPE_PCIEXPRESS = 1 , RSMI_IOLINK_TYPE_XGMI = 2 , RSMI_IOLINK_TYPE_NUMIOLINKTYPES , RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF } |
Types for IO Link. More... | |
enum | RSMI_UTILIZATION_COUNTER_TYPE { RSMI_UTILIZATION_COUNTER_FIRST = 0 , RSMI_COARSE_GRAIN_GFX_ACTIVITY = RSMI_UTILIZATION_COUNTER_FIRST , RSMI_COARSE_GRAIN_MEM_ACTIVITY , RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY } |
The utilization counter type. More... | |
Functions | |
rsmi_status_t | rsmi_init (uint64_t init_flags) |
Initialize ROCm SMI. More... | |
rsmi_status_t | rsmi_shut_down (void) |
Shutdown ROCm SMI. More... | |
rsmi_status_t | rsmi_num_monitor_devices (uint32_t *num_devices) |
Get the number of devices that have monitor information. More... | |
rsmi_status_t | rsmi_dev_id_get (uint32_t dv_ind, uint16_t *id) |
Get the device id associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_sku_get (uint32_t dv_ind, char *sku) |
Get the SKU for a desired device associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_vendor_id_get (uint32_t dv_ind, uint16_t *id) |
Get the device vendor id associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_name_get (uint32_t dv_ind, char *name, size_t len) |
Get the name string of a gpu device. More... | |
rsmi_status_t | rsmi_dev_brand_get (uint32_t dv_ind, char *brand, uint32_t len) |
Get the brand string of a gpu device. More... | |
rsmi_status_t | rsmi_dev_vendor_name_get (uint32_t dv_ind, char *name, size_t len) |
Get the name string for a give vendor ID. More... | |
rsmi_status_t | rsmi_dev_vram_vendor_get (uint32_t dv_ind, char *brand, uint32_t len) |
Get the vram vendor string of a gpu device. More... | |
rsmi_status_t | rsmi_dev_serial_number_get (uint32_t dv_ind, char *serial_num, uint32_t len) |
Get the serial number string for a device. More... | |
rsmi_status_t | rsmi_dev_subsystem_id_get (uint32_t dv_ind, uint16_t *id) |
Get the subsystem device id associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_subsystem_name_get (uint32_t dv_ind, char *name, size_t len) |
Get the name string for the device subsytem. More... | |
rsmi_status_t | rsmi_dev_drm_render_minor_get (uint32_t dv_ind, uint32_t *minor) |
Get the drm minor number associated with this device. More... | |
rsmi_status_t | rsmi_dev_subsystem_vendor_id_get (uint32_t dv_ind, uint16_t *id) |
Get the device subsystem vendor id associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_unique_id_get (uint32_t dv_ind, uint64_t *id) |
Get Unique ID. More... | |
rsmi_status_t | rsmi_dev_pci_bandwidth_get (uint32_t dv_ind, rsmi_pcie_bandwidth_t *bandwidth) |
Get the list of possible PCIe bandwidths that are available. More... | |
rsmi_status_t | rsmi_dev_pci_id_get (uint32_t dv_ind, uint64_t *bdfid) |
Get the unique PCI device identifier associated for a device. More... | |
rsmi_status_t | rsmi_topo_numa_affinity_get (uint32_t dv_ind, uint32_t *numa_node) |
Get the NUMA node associated with a device. More... | |
rsmi_status_t | rsmi_dev_pci_throughput_get (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz) |
Get PCIe traffic information. More... | |
rsmi_status_t | rsmi_dev_pci_replay_counter_get (uint32_t dv_ind, uint64_t *counter) |
Get PCIe replay counter. More... | |
rsmi_status_t | rsmi_dev_pci_bandwidth_set (uint32_t dv_ind, uint64_t bw_bitmask) |
Control the set of allowed PCIe bandwidths that can be used. More... | |
rsmi_status_t | rsmi_dev_power_ave_get (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) |
Get the average power consumption of the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_energy_count_get (uint32_t dv_ind, uint64_t *power, float *counter_resolution, uint64_t *timestamp) |
Get the energy accumulator counter of the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_power_cap_get (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) |
Get the cap on power which, when reached, causes the system to take action to reduce power. More... | |
rsmi_status_t | rsmi_dev_power_cap_default_get (uint32_t dv_ind, uint64_t *default_cap) |
Get the default power cap for the device specified by dv_ind . More... | |
rsmi_status_t | rsmi_dev_power_cap_range_get (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min) |
Get the range of valid values for the power cap. More... | |
rsmi_status_t | rsmi_dev_power_cap_set (uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) |
Set the power cap value. More... | |
rsmi_status_t | rsmi_dev_power_profile_set (uint32_t dv_ind, uint32_t reserved, rsmi_power_profile_preset_masks_t profile) |
Set the power profile. More... | |
rsmi_status_t | rsmi_dev_memory_total_get (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *total) |
Get the total amount of memory that exists. More... | |
rsmi_status_t | rsmi_dev_memory_usage_get (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *used) |
Get the current memory usage. More... | |
rsmi_status_t | rsmi_dev_memory_busy_percent_get (uint32_t dv_ind, uint32_t *busy_percent) |
Get percentage of time any device memory is being used. More... | |
rsmi_status_t | rsmi_dev_memory_reserved_pages_get (uint32_t dv_ind, uint32_t *num_pages, rsmi_retired_page_record_t *records) |
Get information about reserved ("retired") memory pages. More... | |
rsmi_status_t | rsmi_dev_fan_rpms_get (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) |
Get the fan speed in RPMs of the device with the specified device index and 0-based sensor index. More... | |
rsmi_status_t | rsmi_dev_fan_speed_get (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) |
Get the fan speed for the specified device as a value relative to RSMI_MAX_FAN_SPEED. More... | |
rsmi_status_t | rsmi_dev_fan_speed_max_get (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed) |
Get the max. fan speed of the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_temp_metric_get (uint32_t dv_ind, uint32_t sensor_type, rsmi_temperature_metric_t metric, int64_t *temperature) |
Get the temperature metric value for the specified metric, from the specified temperature sensor on the specified device. More... | |
rsmi_status_t | rsmi_dev_volt_metric_get (uint32_t dv_ind, rsmi_voltage_type_t sensor_type, rsmi_voltage_metric_t metric, int64_t *voltage) |
Get the voltage metric value for the specified metric, from the specified voltage sensor on the specified device. More... | |
rsmi_status_t | rsmi_dev_fan_reset (uint32_t dv_ind, uint32_t sensor_ind) |
Reset the fan to automatic driver control. More... | |
rsmi_status_t | rsmi_dev_fan_speed_set (uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) |
Set the fan speed for the specified device with the provided speed, in RPMs. More... | |
rsmi_status_t | rsmi_dev_busy_percent_get (uint32_t dv_ind, uint32_t *busy_percent) |
Get percentage of time device is busy doing any processing. More... | |
rsmi_status_t | rsmi_utilization_count_get (uint32_t dv_ind, rsmi_utilization_counter_t utilization_counters[], uint32_t count, uint64_t *timestamp) |
Get coarse grain utilization counter of the specified device. More... | |
rsmi_status_t | rsmi_dev_perf_level_get (uint32_t dv_ind, rsmi_dev_perf_level_t *perf) |
Get the performance level of the device with provided device index. More... | |
rsmi_status_t | rsmi_perf_determinism_mode_set (uint32_t dv_ind, uint64_t clkvalue) |
Enter performance determinism mode with provided device index. More... | |
rsmi_status_t | rsmi_dev_overdrive_level_get (uint32_t dv_ind, uint32_t *od) |
Get the overdrive percent associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_mem_overdrive_level_get (uint32_t dv_ind, uint32_t *od) |
Get the memory clock overdrive percent associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_gpu_clk_freq_get (uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_frequencies_t *f) |
Get the list of possible system clock speeds of device for a specified clock type. More... | |
rsmi_status_t | rsmi_dev_gpu_reset (int32_t dv_ind) |
Reset the gpu associated with the device with provided device index. More... | |
rsmi_status_t | rsmi_dev_od_volt_info_get (uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) |
This function retrieves the voltage/frequency curve information. More... | |
rsmi_status_t | rsmi_dev_gpu_metrics_info_get (uint32_t dv_ind, rsmi_gpu_metrics_t *pgpu_metrics) |
This function retrieves the gpu metrics information. More... | |
rsmi_status_t | rsmi_dev_clk_range_set (uint32_t dv_ind, uint64_t minclkvalue, uint64_t maxclkvalue, rsmi_clk_type_t clkType) |
This function sets the clock range information. More... | |
rsmi_status_t | rsmi_dev_od_clk_info_set (uint32_t dv_ind, rsmi_freq_ind_t level, uint64_t clkvalue, rsmi_clk_type_t clkType) |
This function sets the clock frequency information. More... | |
rsmi_status_t | rsmi_dev_od_volt_info_set (uint32_t dv_ind, uint32_t vpoint, uint64_t clkvalue, uint64_t voltvalue) |
This function sets 1 of the 3 voltage curve points. More... | |
rsmi_status_t | rsmi_dev_od_volt_curve_regions_get (uint32_t dv_ind, uint32_t *num_regions, rsmi_freq_volt_region_t *buffer) |
This function will retrieve the current valid regions in the frequency/voltage space. More... | |
rsmi_status_t | rsmi_dev_power_profile_presets_get (uint32_t dv_ind, uint32_t sensor_ind, rsmi_power_profile_status_t *status) |
Get the list of available preset power profiles and an indication of which profile is currently active. More... | |
rsmi_status_t | rsmi_dev_perf_level_set (int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl) |
Set the PowerPlay performance level associated with the device with provided device index with the provided value. More... | |
rsmi_status_t | rsmi_dev_perf_level_set_v1 (uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl) |
Set the PowerPlay performance level associated with the device with provided device index with the provided value. More... | |
rsmi_status_t | rsmi_dev_overdrive_level_set (int32_t dv_ind, uint32_t od) |
Set the overdrive percent associated with the device with provided device index with the provided value. See details for WARNING. More... | |
rsmi_status_t | rsmi_dev_overdrive_level_set_v1 (uint32_t dv_ind, uint32_t od) |
Set the overdrive percent associated with the device with provided device index with the provided value. See details for WARNING. More... | |
rsmi_status_t | rsmi_dev_gpu_clk_freq_set (uint32_t dv_ind, rsmi_clk_type_t clk_type, uint64_t freq_bitmask) |
Control the set of allowed frequencies that can be used for the specified clock. More... | |
rsmi_status_t | rsmi_version_get (rsmi_version_t *version) |
Get the build version information for the currently running build of RSMI. More... | |
rsmi_status_t | rsmi_version_str_get (rsmi_sw_component_t component, char *ver_str, uint32_t len) |
Get the driver version string for the current system. More... | |
rsmi_status_t | rsmi_dev_vbios_version_get (uint32_t dv_ind, char *vbios, uint32_t len) |
Get the VBIOS identifer string. More... | |
rsmi_status_t | rsmi_dev_firmware_version_get (uint32_t dv_ind, rsmi_fw_block_t block, uint64_t *fw_version) |
Get the firmware versions for a device. More... | |
rsmi_status_t | rsmi_dev_ecc_count_get (uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_error_count_t *ec) |
Retrieve the error counts for a GPU block. More... | |
rsmi_status_t | rsmi_dev_ecc_enabled_get (uint32_t dv_ind, uint64_t *enabled_blocks) |
Retrieve the enabled ECC bit-mask. More... | |
rsmi_status_t | rsmi_dev_ecc_status_get (uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_ras_err_state_t *state) |
Retrieve the ECC status for a GPU block. More... | |
rsmi_status_t | rsmi_status_string (rsmi_status_t status, const char **status_string) |
Get a description of a provided RSMI error status. More... | |
rsmi_status_t | rsmi_dev_counter_group_supported (uint32_t dv_ind, rsmi_event_group_t group) |
Tell if an event group is supported by a given device. More... | |
rsmi_status_t | rsmi_dev_counter_create (uint32_t dv_ind, rsmi_event_type_t type, rsmi_event_handle_t *evnt_handle) |
Create a performance counter object. More... | |
rsmi_status_t | rsmi_dev_counter_destroy (rsmi_event_handle_t evnt_handle) |
Deallocate a performance counter object. More... | |
rsmi_status_t | rsmi_counter_control (rsmi_event_handle_t evt_handle, rsmi_counter_command_t cmd, void *cmd_args) |
Issue performance counter control commands. More... | |
rsmi_status_t | rsmi_counter_read (rsmi_event_handle_t evt_handle, rsmi_counter_value_t *value) |
Read the current value of a performance counter. More... | |
rsmi_status_t | rsmi_counter_available_counters_get (uint32_t dv_ind, rsmi_event_group_t grp, uint32_t *available) |
Get the number of currently available counters. More... | |
rsmi_status_t | rsmi_compute_process_info_get (rsmi_process_info_t *procs, uint32_t *num_items) |
Get process information about processes currently using GPU. More... | |
rsmi_status_t | rsmi_compute_process_info_by_pid_get (uint32_t pid, rsmi_process_info_t *proc) |
Get process information about a specific process. More... | |
rsmi_status_t | rsmi_compute_process_gpus_get (uint32_t pid, uint32_t *dv_indices, uint32_t *num_devices) |
Get the device indices currently being used by a process. More... | |
rsmi_status_t | rsmi_dev_xgmi_error_status (uint32_t dv_ind, rsmi_xgmi_status_t *status) |
Retrieve the XGMI error status for a device. More... | |
rsmi_status_t | rsmi_dev_xgmi_error_reset (uint32_t dv_ind) |
Reset the XGMI error status for a device. More... | |
rsmi_status_t | rsmi_dev_xgmi_hive_id_get (uint32_t dv_ind, uint64_t *hive_id) |
Retrieve the XGMI hive id for a device. More... | |
rsmi_status_t | rsmi_topo_get_numa_node_number (uint32_t dv_ind, uint32_t *numa_node) |
Retrieve the NUMA CPU node number for a device. More... | |
rsmi_status_t | rsmi_topo_get_link_weight (uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *weight) |
Retrieve the weight for a connection between 2 GPUs. More... | |
rsmi_status_t | rsmi_minmax_bandwidth_get (uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *min_bandwidth, uint64_t *max_bandwidth) |
Retreive minimal and maximal io link bandwidth between 2 GPUs. More... | |
rsmi_status_t | rsmi_topo_get_link_type (uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *hops, RSMI_IO_LINK_TYPE *type) |
Retrieve the hops and the connection type between 2 GPUs. More... | |
rsmi_status_t | rsmi_is_P2P_accessible (uint32_t dv_ind_src, uint32_t dv_ind_dst, bool *accessible) |
Return P2P availability status between 2 GPUs. More... | |
rsmi_status_t | rsmi_dev_compute_partition_get (uint32_t dv_ind, char *compute_partition, uint32_t len) |
Retrieves the current compute partitioning for a desired device. More... | |
rsmi_status_t | rsmi_dev_compute_partition_set (uint32_t dv_ind, rsmi_compute_partition_type_t compute_partition) |
Modifies a selected device's compute partition setting. More... | |
rsmi_status_t | rsmi_dev_supported_func_iterator_open (uint32_t dv_ind, rsmi_func_id_iter_handle_t *handle) |
Get a function name iterator of supported RSMI functions for a device. More... | |
rsmi_status_t | rsmi_dev_supported_variant_iterator_open (rsmi_func_id_iter_handle_t obj_h, rsmi_func_id_iter_handle_t *var_iter) |
Get a variant iterator for a given handle. More... | |
rsmi_status_t | rsmi_func_iter_next (rsmi_func_id_iter_handle_t handle) |
Advance a function identifer iterator. More... | |
rsmi_status_t | rsmi_dev_supported_func_iterator_close (rsmi_func_id_iter_handle_t *handle) |
Close a variant iterator handle. More... | |
rsmi_status_t | rsmi_func_iter_value_get (rsmi_func_id_iter_handle_t handle, rsmi_func_id_value_t *value) |
Get the value associated with a function/variant iterator. More... | |
rsmi_status_t | rsmi_event_notification_init (uint32_t dv_ind) |
Prepare to collect event notifications for a GPU. More... | |
rsmi_status_t | rsmi_event_notification_mask_set (uint32_t dv_ind, uint64_t mask) |
Specify which events to collect for a device. More... | |
rsmi_status_t | rsmi_event_notification_get (int timeout_ms, uint32_t *num_elem, rsmi_evt_notification_data_t *data) |
Collect event notifications, waiting a specified amount of time. More... | |
rsmi_status_t | rsmi_event_notification_stop (uint32_t dv_ind) |
Close any file handles and free any resources used by event notification for a GPU. More... | |
Detailed Description
The rocm_smi library api is new, and therefore subject to change either at the ABI or API level. Instead of marking every function prototype as "unstable", we are instead saying the API is unstable (i.e., changes are possible) while the major version remains 0. This means that if the API/ABI changes, we will not increment the major version to 1. Once the ABI stabilizes, we will increment the major version to 1, and thereafter increment it on all ABI breaks.
Main header file for the ROCm SMI library. All required function, structure, enum, etc. definitions should be defined in this file.
Macro Definition Documentation
◆ RSMI_MAX_FAN_SPEED
#define RSMI_MAX_FAN_SPEED 255 |
Maximum possible value for fan speed. Should be used as the denominator when determining fan speed percentage.
◆ RSMI_EVENT_MASK_FROM_INDEX
#define RSMI_EVENT_MASK_FROM_INDEX | ( | i | ) | (1ULL << ((i) - 1)) |
Macro to generate event bitmask from event id
◆ RSMI_DEFAULT_VARIANT
#define RSMI_DEFAULT_VARIANT 0xFFFFFFFFFFFFFFFF |
Place-holder "variant" for functions that have don't have any variants, but do have monitors or sensors.
Typedef Documentation
◆ rsmi_event_handle_t
typedef uintptr_t rsmi_event_handle_t |
Handle to performance event counter.
Event counter types
Enumeration Type Documentation
◆ rsmi_status_t
enum rsmi_status_t |
Error codes retured by rocm_smi_lib functions.
◆ rsmi_init_flags_t
enum rsmi_init_flags_t |
Initialization flags.
Initialization flags may be OR'd together and passed to rsmi_init().
◆ rsmi_dev_perf_level_t
PowerPlay performance levels.
◆ rsmi_sw_component_t
enum rsmi_sw_component_t |
◆ rsmi_event_group_t
enum rsmi_event_group_t |
◆ rsmi_event_type_t
enum rsmi_event_type_t |
Event type enum. Events belonging to a particular event group rsmi_event_group_t should begin enumerating at the rsmi_event_group_t value for that group.
Event types
Enumerator | |
---|---|
RSMI_EVNT_XGMI_0_NOP_TX | NOPs sent to neighbor 0. |
RSMI_EVNT_XGMI_0_REQUEST_TX | Outgoing requests to neighbor 0 |
RSMI_EVNT_XGMI_0_RESPONSE_TX | Outgoing responses to neighbor 0 |
RSMI_EVNT_XGMI_0_BEATS_TX | Data beats sent to neighbor 0; Each beat represents 32 bytes. XGMI throughput can be calculated by multiplying a BEATs event such as RSMI_EVNT_XGMI_0_BEATS_TX by 32 and dividing by the time for which event collection occurred, rsmi_counter_value_t.time_running (which is in nanoseconds). To get bytes per second, multiply this value by 109. |
RSMI_EVNT_XGMI_1_NOP_TX | NOPs sent to neighbor 1. |
RSMI_EVNT_XGMI_1_REQUEST_TX | neighbor 1 Outgoing requests to |
RSMI_EVNT_XGMI_1_RESPONSE_TX | Outgoing responses to neighbor 1 |
RSMI_EVNT_XGMI_1_BEATS_TX | Data beats sent to neighbor 1; Each beat represents 32 bytes |
RSMI_EVNT_XGMI_DATA_OUT_1 | Outbound beats to neighbor 1. |
RSMI_EVNT_XGMI_DATA_OUT_2 | Outbound beats to neighbor 2. |
RSMI_EVNT_XGMI_DATA_OUT_3 | Outbound beats to neighbor 3. |
RSMI_EVNT_XGMI_DATA_OUT_4 | Outbound beats to neighbor 4. |
RSMI_EVNT_XGMI_DATA_OUT_5 | Outbound beats to neighbor 5. |
◆ rsmi_counter_command_t
◆ rsmi_evt_notification_type_t
◆ rsmi_clk_type_t
enum rsmi_clk_type_t |
◆ rsmi_compute_partition_type_t
Compute Partition types
◆ rsmi_temperature_metric_t
Temperature Metrics. This enum is used to identify various temperature metrics. Corresponding values will be in millidegress Celcius.
◆ rsmi_temperature_type_t
This enumeration is used to indicate from which part of the device a temperature reading should be obtained.
◆ rsmi_voltage_metric_t
Voltage Metrics. This enum is used to identify various Volatge metrics. Corresponding values will be in millivolt.
◆ rsmi_voltage_type_t
enum rsmi_voltage_type_t |
◆ rsmi_power_profile_preset_masks_t
Pre-set Profile Selections. These bitmasks can be AND'd with the rsmi_power_profile_status_t.available_profiles returned from rsmi_dev_power_profile_presets_get to determine which power profiles are supported by the system.
◆ rsmi_gpu_block_t
enum rsmi_gpu_block_t |
This enum is used to identify different GPU blocks.
◆ rsmi_ras_err_state_t
enum rsmi_ras_err_state_t |
The current ECC state.
◆ rsmi_memory_type_t
enum rsmi_memory_type_t |
◆ rsmi_freq_ind_t
enum rsmi_freq_ind_t |
◆ rsmi_memory_page_status_t
Reserved Memory Page States.
◆ _RSMI_IO_LINK_TYPE
enum _RSMI_IO_LINK_TYPE |