rocm_smi.h Source File

rocm_smi.h Source File#

ROCmSMI: rocm_smi.h Source File
rocm_smi.h
Go to the documentation of this file.
1 /*
2  * =============================================================================
3  * The University of Illinois/NCSA
4  * Open Source License (NCSA)
5  *
6  * Copyright (c) 2017-2025, Advanced Micro Devices, Inc.
7  * All rights reserved.
8  *
9  * Developed by:
10  *
11  * AMD Research and AMD ROC Software Development
12  *
13  * Advanced Micro Devices, Inc.
14  *
15  * www.amd.com
16  *
17  * Permission is hereby granted, free of charge, to any person obtaining a copy
18  * of this software and associated documentation files (the "Software"), to
19  * deal with the Software without restriction, including without limitation
20  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21  * and/or sell copies of the Software, and to permit persons to whom the
22  * Software is furnished to do so, subject to the following conditions:
23  *
24  * - Redistributions of source code must retain the above copyright notice,
25  * this list of conditions and the following disclaimers.
26  * - Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimers in
28  * the documentation and/or other materials provided with the distribution.
29  * - Neither the names of <Name of Development Group, Name of Institution>,
30  * nor the names of its contributors may be used to endorse or promote
31  * products derived from this Software without specific prior written
32  * permission.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
37  * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
38  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
39  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
40  * DEALINGS WITH THE SOFTWARE.
41  *
42  */
43 
44 #ifndef ROCM_SMI_ROCM_SMI_H_
45 #define ROCM_SMI_ROCM_SMI_H_
46 
47 #ifdef __cplusplus
48 extern "C" {
49 #include <cstdint>
50 #else
51 #include <stdint.h>
52 #endif // __cplusplus
53 
54 #include <stddef.h>
55 #include <stdbool.h>
56 
57 #include "rocm_smi/kfd_ioctl.h"
58 
75 #define RSMI_MAX_NUM_FREQUENCIES 33
76 
79 #define RSMI_MAX_FAN_SPEED 255
80 
82 #define RSMI_NUM_VOLTAGE_CURVE_POINTS 3
83 
84 
88 typedef enum {
110  RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
139 
141 } rsmi_status_t;
142 
149 typedef enum {
156  RSMI_INIT_FLAG_THRAD_ONLY_MUTEX = 0x400000000000000,
157  RSMI_INIT_FLAG_RESRV_TEST1 = 0x800000000000000,
159 
163 typedef enum {
165  RSMI_DEV_PERF_LEVEL_FIRST = RSMI_DEV_PERF_LEVEL_AUTO,
166 
181 
182  RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_DETERMINISM,
183 
187 typedef rsmi_dev_perf_level_t rsmi_dev_perf_level;
189 
196 typedef enum {
197  RSMI_SW_COMP_FIRST = 0x0,
198 
199  RSMI_SW_COMP_DRIVER = RSMI_SW_COMP_FIRST,
200 
201  RSMI_SW_COMP_LAST = RSMI_SW_COMP_DRIVER
203 
211 typedef uintptr_t rsmi_event_handle_t;
212 
219 typedef enum {
222  RSMI_EVNT_GRP_INVALID = 0xFFFFFFFF
224 
231 typedef enum {
232  RSMI_EVNT_FIRST = RSMI_EVNT_GRP_XGMI,
233 
234  RSMI_EVNT_XGMI_FIRST = RSMI_EVNT_GRP_XGMI,
235  RSMI_EVNT_XGMI_0_NOP_TX = RSMI_EVNT_XGMI_FIRST,
240 
253  // ie, Throughput = BEATS/time_running 10^9 bytes/sec
263 
264  RSMI_EVNT_XGMI_LAST = RSMI_EVNT_XGMI_1_BEATS_TX, // 5
265 
266  RSMI_EVNT_XGMI_DATA_OUT_FIRST = RSMI_EVNT_GRP_XGMI_DATA_OUT, // 10
267 
268  /*
269  * @brief Events in the RSMI_EVNT_GRP_XGMI_DATA_OUT group measure
270  * the number of beats sent on an XGMI link. Each beat represents
271  * 32 bytes. RSMI_EVNT_XGMI_DATA_OUT_n represents the number of
272  * outbound beats (each representing 32 bytes) on link n.<br><br>
273  *
274  * XGMI throughput can be calculated by multiplying a event
275  * such as ::RSMI_EVNT_XGMI_DATA_OUT_n by 32 and dividing by
276  * the time for which event collection occurred,
277  * ::rsmi_counter_value_t.time_running (which is in nanoseconds). To get
278  * bytes per second, multiply this value by 10<sup>9</sup>.<br>
279  * <br>
280  * Throughput = BEATS/time_running * 10<sup>9</sup> (bytes/second)<br>
281  */
282  // ie, Throughput = BEATS/time_running 10^9 bytes/sec
283  RSMI_EVNT_XGMI_DATA_OUT_0 = RSMI_EVNT_XGMI_DATA_OUT_FIRST,
289  RSMI_EVNT_XGMI_DATA_OUT_LAST = RSMI_EVNT_XGMI_DATA_OUT_5,
290 
291  RSMI_EVNT_LAST = RSMI_EVNT_XGMI_DATA_OUT_LAST,
293 
297 typedef enum {
302 
306 typedef struct {
307  uint64_t value;
308  uint64_t time_enabled;
310  uint64_t time_running;
313 
317 typedef enum {
318  RSMI_EVT_NOTIF_NONE = KFD_SMI_EVENT_NONE,
319  RSMI_EVT_NOTIF_VMFAULT = KFD_SMI_EVENT_VMFAULT,
320  RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT,
321  RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE,
322  RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET,
323  RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET,
324  RSMI_EVT_NOTIF_EVENT_MIGRATE_START = KFD_SMI_EVENT_MIGRATE_START,
325  RSMI_EVT_NOTIF_EVENT_MIGRATE_END = KFD_SMI_EVENT_MIGRATE_END,
326  RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_START = KFD_SMI_EVENT_PAGE_FAULT_START,
327  RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_END = KFD_SMI_EVENT_PAGE_FAULT_END,
328  RSMI_EVT_NOTIF_EVENT_QUEUE_EVICTION = KFD_SMI_EVENT_QUEUE_EVICTION,
329  RSMI_EVT_NOTIF_EVENT_QUEUE_RESTORE = KFD_SMI_EVENT_QUEUE_RESTORE,
330  RSMI_EVT_NOTIF_EVENT_UNMAP_FROM_GPU = KFD_SMI_EVENT_UNMAP_FROM_GPU,
331  RSMI_EVT_NOTIF_EVENT_ALL_PROCESS = KFD_SMI_EVENT_ALL_PROCESS,
332  RSMI_EVT_NOTIF_LAST = KFD_SMI_EVENT_ALL_PROCESS
334 
338 #define RSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
339 
341 // matches kfd message max size
342 #define MAX_EVENT_NOTIFICATION_MSG_SIZE 96
343 
347 typedef struct {
348  uint32_t dv_ind;
352 
356 typedef enum {
358  RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
365 
366  // Add new clocks to the end (not in the middle) and update
367  // RSMI_CLK_TYPE_LAST
368  RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM,
369  RSMI_CLK_INVALID = 0xFFFFFFFF
372 typedef rsmi_clk_type_t rsmi_clk_type;
374 
379 typedef enum {
380  RSMI_COMPUTE_PARTITION_INVALID = 0,
393 typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
395 
400 typedef enum {
401  RSMI_MEMORY_PARTITION_UNKNOWN = 0,
417 typedef rsmi_memory_partition_type_t rsmi_memory_partition_type;
419 
425 typedef enum {
427  RSMI_TEMP_FIRST = RSMI_TEMP_CURRENT,
428 
459 
460  RSMI_TEMP_LAST = RSMI_TEMP_HIGHEST
463 typedef rsmi_temperature_metric_t rsmi_temperature_metric;
465 
470 typedef enum {
471  RSMI_TEMP_TYPE_FIRST = 0,
472 
473  RSMI_TEMP_TYPE_EDGE = RSMI_TEMP_TYPE_FIRST,
481  RSMI_TEMP_TYPE_LAST = RSMI_TEMP_TYPE_HBM_3,
482  RSMI_TEMP_TYPE_INVALID = 0xFFFFFFFF
484 
490 typedef enum {
491  /* Utilization */
492  RSMI_ACTIVITY_GFX = (0x1 << 0),
493  RSMI_ACTIVITY_UMC = (0x1 << 1),
494  RSMI_ACTIVITY_MM = (0x1 << 2)
496 
497 
503 typedef enum {
505 
506  RSMI_VOLT_FIRST = RSMI_VOLT_CURRENT,
514 
515  RSMI_VOLT_LAST = RSMI_VOLT_HIGHEST
517 
522 typedef enum {
523  RSMI_VOLT_TYPE_FIRST = 0,
524 
525  RSMI_VOLT_TYPE_VDDGFX = RSMI_VOLT_TYPE_FIRST,
527 
528  RSMI_VOLT_TYPE_LAST = RSMI_VOLT_TYPE_VDDBOARD,
529  RSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF
531 
538 typedef enum {
544 
546  RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20,
549 
551  RSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF
554 typedef rsmi_power_profile_preset_masks_t rsmi_power_profile_preset_masks;
556 
560 typedef enum {
561  RSMI_GPU_BLOCK_INVALID = 0x0000000000000000,
563  RSMI_GPU_BLOCK_FIRST = 0x0000000000000001,
564 
565  RSMI_GPU_BLOCK_UMC = RSMI_GPU_BLOCK_FIRST,
566  RSMI_GPU_BLOCK_SDMA = 0x0000000000000002,
567  RSMI_GPU_BLOCK_GFX = 0x0000000000000004,
568  RSMI_GPU_BLOCK_MMHUB = 0x0000000000000008,
569  RSMI_GPU_BLOCK_ATHUB = 0x0000000000000010,
570  RSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020,
571  RSMI_GPU_BLOCK_HDP = 0x0000000000000040,
572  RSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080,
573  RSMI_GPU_BLOCK_DF = 0x0000000000000100,
574  RSMI_GPU_BLOCK_SMN = 0x0000000000000200,
575  RSMI_GPU_BLOCK_SEM = 0x0000000000000400,
576  RSMI_GPU_BLOCK_MP0 = 0x0000000000000800,
577  RSMI_GPU_BLOCK_MP1 = 0x0000000000001000,
578  RSMI_GPU_BLOCK_FUSE = 0x0000000000002000,
579 
582  RSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
585 typedef rsmi_gpu_block_t rsmi_gpu_block;
587 
591 typedef enum {
600 
601  RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_ENABLED,
602  RSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF
604 
608 typedef enum {
609  RSMI_MEM_TYPE_FIRST = 0,
610 
611  RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST,
614 
615  RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT
617 
621 typedef enum {
624  RSMI_FREQ_IND_INVALID = 0xFFFFFFFF
627 typedef rsmi_freq_ind_t rsmi_freq_ind;
629 
630 
635 typedef enum {
636  RSMI_FW_BLOCK_FIRST = 0,
637 
638  RSMI_FW_BLOCK_ASD = RSMI_FW_BLOCK_FIRST,
639  RSMI_FW_BLOCK_CE,
640  RSMI_FW_BLOCK_DMCU,
641  RSMI_FW_BLOCK_MC,
642  RSMI_FW_BLOCK_ME,
643  RSMI_FW_BLOCK_MEC,
644  RSMI_FW_BLOCK_MEC2,
645  RSMI_FW_BLOCK_MES,
646  RSMI_FW_BLOCK_MES_KIQ,
647  RSMI_FW_BLOCK_PFP,
648  RSMI_FW_BLOCK_RLC,
649  RSMI_FW_BLOCK_RLC_SRLC,
650  RSMI_FW_BLOCK_RLC_SRLG,
651  RSMI_FW_BLOCK_RLC_SRLS,
652  RSMI_FW_BLOCK_SDMA,
653  RSMI_FW_BLOCK_SDMA2,
654  RSMI_FW_BLOCK_SMC,
655  RSMI_FW_BLOCK_SOS,
656  RSMI_FW_BLOCK_TA_RAS,
657  RSMI_FW_BLOCK_TA_XGMI,
658  RSMI_FW_BLOCK_UVD,
659  RSMI_FW_BLOCK_VCE,
660  RSMI_FW_BLOCK_VCN,
661 
662  RSMI_FW_BLOCK_LAST = RSMI_FW_BLOCK_VCN
664 
668 typedef enum {
669  RSMI_XGMI_STATUS_NO_ERRORS = 0,
670  RSMI_XGMI_STATUS_ERROR,
671  RSMI_XGMI_STATUS_MULTIPLE_ERRORS,
673 
677 typedef uint64_t rsmi_bit_field_t;
679 typedef rsmi_bit_field_t rsmi_bit_field;
681 
685 typedef enum {
693 
697 typedef enum _RSMI_IO_LINK_TYPE {
702  RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF
704 
707 #define CPU_NODE_INDEX 0xFFFFFFFF
708 
712 typedef enum {
715  RSMI_COARSE_GRAIN_GFX_ACTIVITY = RSMI_UTILIZATION_COUNTER_FIRST,
717  RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY
719 
723 typedef enum {
726  RSMI_INVALID_POWER = 0xFFFFFFFF
728 
732 typedef struct {
734  uint64_t value;
736 
740 typedef struct {
741  uint64_t page_address;
742  uint64_t page_size;
745 
749 #define RSMI_MAX_NUM_POWER_PROFILES (sizeof(rsmi_bit_field_t) * 8)
750 
756 typedef struct {
761 
766 
770  uint32_t num_profiles;
773 typedef rsmi_power_profile_status_t rsmi_power_profile_status;
775 
779 typedef struct {
784 
788  uint32_t num_supported;
789 
793  uint32_t current;
794 
799  uint64_t frequency[RSMI_MAX_NUM_FREQUENCIES];
802 typedef rsmi_frequencies_t rsmi_frequencies;
804 
810 typedef struct {
815 
820  uint32_t lanes[RSMI_MAX_NUM_FREQUENCIES];
822 
824 typedef rsmi_pcie_bandwidth_t rsmi_pcie_bandwidth;
826 
831 typedef struct {
832  /* Utilization */
837 
841 typedef struct {
842  uint32_t major;
843  uint32_t minor;
844  uint32_t patch;
845  const char *build;
848 typedef rsmi_version_t rsmi_version;
850 
853 typedef struct {
854  uint64_t lower_bound;
855  uint64_t upper_bound;
856 } rsmi_range_t;
858 typedef rsmi_range_t rsmi_range;
860 
864 typedef struct {
865  uint64_t frequency;
866  uint64_t voltage;
869 typedef rsmi_od_vddc_point_t rsmi_od_vddc_point;
871 
877 typedef struct {
882 typedef rsmi_freq_volt_region_t rsmi_freq_volt_region;
884 
888 typedef struct {
896 typedef rsmi_od_volt_curve_t rsmi_od_volt_curve;
898 
902 typedef struct {
908 
913  uint32_t num_regions;
916 typedef rsmi_od_volt_freq_data_t rsmi_od_volt_freq_data;
918 
919 
928  // TODO(amd) Doxygen documents
929  // Note: This should match: AMDGpuMetricsHeader_v1_t
931  uint16_t structure_size;
932  uint8_t format_revision;
933  uint8_t content_revision;
935 };
939 
943 #define CENTRIGRADE_TO_MILLI_CENTIGRADE 1000
944 
948 #define RSMI_NUM_HBM_INSTANCES 4
949 
953 #define RSMI_MAX_NUM_VCNS 4
954 
958 #define RSMI_MAX_NUM_JPEG_ENGS 32
959 
963 #define RSMI_MAX_NUM_JPEG_ENG_V1 40
964 
968 #define RSMI_MAX_NUM_CLKS 4
969 
973 #define RSMI_MAX_NUM_XGMI_LINKS 8
974 
978 #define RSMI_MAX_NUM_GFX_CLKS 8
979 
989 #define RSMI_MAX_NUM_XCC 8
990 
1001 #define RSMI_MAX_NUM_XCP 8
1002 
1007  /*
1008  * v1.6 additions
1009  */
1010  /* Utilization Instantaneous (%) */
1011  uint32_t gfx_busy_inst[RSMI_MAX_NUM_XCC];
1012  uint16_t jpeg_busy[RSMI_MAX_NUM_JPEG_ENG_V1];
1013  uint16_t vcn_busy[RSMI_MAX_NUM_VCNS];
1014 
1015  /* Utilization Accumulated (%) */
1016  uint64_t gfx_busy_acc[RSMI_MAX_NUM_XCC];
1017 
1018  /*
1019  * v1.7 additions
1020  */
1021  /* Total App Clock Counter Accumulated */
1022  uint64_t gfx_below_host_limit_acc[RSMI_MAX_NUM_XCC];
1023 
1028  uint64_t gfx_below_host_limit_thm_acc[RSMI_MAX_NUM_XCC];
1029  uint64_t gfx_low_utilization_acc[RSMI_MAX_NUM_XCC];
1030  uint64_t gfx_below_host_limit_total_acc[RSMI_MAX_NUM_XCC];
1031 };
1032 
1033 typedef struct {
1034  // TODO(amd) Doxygen documents
1035  // Note: This structure is extended to fit the needs of different GPU metric
1036  // versions when exposing data through the structure.
1037  // Depending on the version, some data members will hold data, and
1038  // some will not. A good example is the set of 'current clocks':
1039  // - current_gfxclk, current_socclk, current_vclk0, current_dclk0
1040  // These are single-valued data members, up to version 1.3.
1041  // For version 1.4 and up these are multi-valued data members (arrays)
1042  // and their counterparts;
1043  // - current_gfxclks[], current_socclks[], current_vclk0s[],
1044  // current_dclk0s[]
1045  // will hold the data
1047 
1048  /*
1049  * v1.0 Base
1050  */
1051  struct metrics_table_header_t common_header;
1052 
1053  // Temperature (C)
1054  uint16_t temperature_edge;
1055  uint16_t temperature_hotspot;
1056  uint16_t temperature_mem;
1057  uint16_t temperature_vrgfx;
1058  uint16_t temperature_vrsoc;
1059  uint16_t temperature_vrmem;
1060 
1061  // Utilization (%)
1062  uint16_t average_gfx_activity;
1063  uint16_t average_umc_activity; // memory controller
1064  uint16_t average_mm_activity; // UVD or VCN
1065 
1066  // Power (W) /Energy (15.259uJ per 1ns)
1067  uint16_t average_socket_power;
1068  uint64_t energy_accumulator; // v1 mod. (32->64)
1069 
1070  // Driver attached timestamp (in ns)
1071  uint64_t system_clock_counter; // v1 mod. (moved from top of struct)
1072 
1073  // Average clocks (MHz)
1074  uint16_t average_gfxclk_frequency;
1075  uint16_t average_socclk_frequency;
1076  uint16_t average_uclk_frequency;
1077  uint16_t average_vclk0_frequency;
1078  uint16_t average_dclk0_frequency;
1079  uint16_t average_vclk1_frequency;
1080  uint16_t average_dclk1_frequency;
1081 
1082  // Current clocks (MHz)
1083  uint16_t current_gfxclk;
1084  uint16_t current_socclk;
1085  uint16_t current_uclk;
1086  uint16_t current_vclk0;
1087  uint16_t current_dclk0;
1088  uint16_t current_vclk1;
1089  uint16_t current_dclk1;
1090 
1091  // Throttle status
1092  uint32_t throttle_status;
1093 
1094  // Fans (RPM)
1095  uint16_t current_fan_speed;
1096 
1097  // Link width (number of lanes) /speed (0.1 GT/s)
1098  uint16_t pcie_link_width; // v1 mod.(8->16)
1099  uint16_t pcie_link_speed; // in 0.1 GT/s; v1 mod. (8->16)
1100 
1101 
1102  /*
1103  * v1.1 additions
1104  */
1105  uint32_t gfx_activity_acc; // new in v1
1106  uint32_t mem_activity_acc; // new in v1
1107  uint16_t temperature_hbm[RSMI_NUM_HBM_INSTANCES]; // new in v1
1108 
1109 
1110  /*
1111  * v1.2 additions
1112  */
1113  // PMFW attached timestamp (10ns resolution)
1114  uint64_t firmware_timestamp;
1115 
1116 
1117  /*
1118  * v1.3 additions
1119  */
1120  // Voltage (mV)
1121  uint16_t voltage_soc;
1122  uint16_t voltage_gfx;
1123  uint16_t voltage_mem;
1124 
1125  // Throttle status
1126  uint64_t indep_throttle_status;
1127 
1128 
1129  /*
1130  * v1.4 additions
1131  */
1132  // Power (Watts)
1133  uint16_t current_socket_power;
1134 
1135  // Utilization (%)
1136  uint16_t vcn_activity[RSMI_MAX_NUM_VCNS]; // VCN instances activity percent (encode/decode)
1137 
1138  // Clock Lock Status. Each bit corresponds to clock instance
1139  uint32_t gfxclk_lock_status;
1140 
1141  // XGMI bus width and bitrate (in GB/s)
1142  uint16_t xgmi_link_width;
1143  uint16_t xgmi_link_speed;
1144 
1145  // PCIE accumulated bandwidth (GB/sec)
1146  uint64_t pcie_bandwidth_acc;
1147 
1148  // PCIE instantaneous bandwidth (GB/sec)
1149  uint64_t pcie_bandwidth_inst;
1150 
1151  // PCIE L0 to recovery state transition accumulated count
1152  uint64_t pcie_l0_to_recov_count_acc;
1153 
1154  // PCIE replay accumulated count
1155  uint64_t pcie_replay_count_acc;
1156 
1157  // PCIE replay rollover accumulated count
1158  uint64_t pcie_replay_rover_count_acc;
1159 
1160  // XGMI accumulated data transfer size(KiloBytes)
1161  uint64_t xgmi_read_data_acc[RSMI_MAX_NUM_XGMI_LINKS];
1162  uint64_t xgmi_write_data_acc[RSMI_MAX_NUM_XGMI_LINKS];
1163 
1164  // XGMI accumulated data transfer size(KiloBytes)
1165  uint16_t current_gfxclks[RSMI_MAX_NUM_GFX_CLKS];
1166  uint16_t current_socclks[RSMI_MAX_NUM_CLKS];
1167  uint16_t current_vclk0s[RSMI_MAX_NUM_CLKS];
1168  uint16_t current_dclk0s[RSMI_MAX_NUM_CLKS];
1169 
1170  /*
1171  * v1.5 additions
1172  */
1173  // JPEG activity percent (encode/decode)
1174  uint16_t jpeg_activity[RSMI_MAX_NUM_JPEG_ENGS];
1175 
1176  // PCIE NAK sent accumulated count
1177  uint32_t pcie_nak_sent_count_acc;
1178 
1179  // PCIE NAK received accumulated count
1180  uint32_t pcie_nak_rcvd_count_acc;
1181 
1182  /*
1183  * v1.6 additions
1184  */
1185  /* Accumulation cycle counter */
1186  uint64_t accumulation_counter;
1187 
1191  uint64_t prochot_residency_acc;
1206  uint64_t ppt_residency_acc;
1221  uint64_t socket_thm_residency_acc;
1222  uint64_t vr_thm_residency_acc;
1223  uint64_t hbm_thm_residency_acc;
1224 
1225  /* Number of current partition */
1226  uint16_t num_partition;
1227 
1228  /* XCP (Graphic Cluster Partitions) metrics stats */
1229  struct amdgpu_xcp_metrics_t xcp_stats[RSMI_MAX_NUM_XCP];
1230 
1231  /* PCIE other end recovery counter */
1232  uint32_t pcie_lc_perf_other_end_recovery;
1233 
1234  /*
1235  * v1.7 additions
1236  */
1237  /* VRAM max bandwidth at max memory clock */
1238  uint64_t vram_max_bandwidth;
1239 
1240  /* XGMI link status(up/down) */
1241  uint16_t xgmi_link_status[RSMI_MAX_NUM_XGMI_LINKS];
1242 
1245 
1249 typedef struct {
1250  uint64_t correctable_err;
1253 
1257 typedef struct {
1258  uint32_t process_id;
1259  uint32_t pasid;
1260  uint64_t vram_usage;
1261  uint64_t sdma_usage;
1262  uint32_t cu_occupancy;
1264 
1266 #define CU_OCCUPANCY_INVALID 0xFFFFFFFF
1267 
1271 typedef struct rsmi_func_id_iter_handle * rsmi_func_id_iter_handle_t;
1272 
1275 #define RSMI_DEFAULT_VARIANT 0xFFFFFFFFFFFFFFFF
1276 
1282 typedef union id {
1283  uint64_t id;
1284  const char *name;
1285  union {
1300  };
1302 
1311 typedef struct {
1313  uint32_t card_index;
1316 
1318  uint64_t bdfid;
1319 
1321  uint64_t kfd_gpu_id;
1322 
1324  uint32_t partition_id;
1325 
1327  uint32_t smi_device_id;
1328 
1329  uint32_t reserved[10];
1331 
1332 /*****************************************************************************/
1350 rsmi_status_t rsmi_init(uint64_t init_flags);
1351 
1358  // end of InitShut
1360 
1361 /*****************************************************************************/
1379 
1407 rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id);
1408 
1423 rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision);
1424 
1449 rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *sku);
1450 
1474 rsmi_status_t rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id);
1475 
1510 rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len);
1511 
1544 rsmi_status_t rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len);
1545 
1580 rsmi_status_t rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len);
1581 
1582 
1608 rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint32_t len);
1609 
1632 rsmi_status_t rsmi_dev_vram_vendor_get(uint32_t dv_ind, char *brand,
1633  uint32_t len);
1634 
1664  char *serial_num, uint32_t len);
1688 rsmi_status_t rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id);
1689 
1725 rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len);
1726 
1745 rsmi_dev_drm_render_minor_get(uint32_t dv_ind, uint32_t *minor);
1746 
1769 rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id);
1770 
1792 rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id);
1793 
1808 rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id);
1809 
1832 rsmi_status_t rsmi_dev_guid_get(uint32_t dv_ind, uint64_t *guid);
1833 
1856 rsmi_status_t rsmi_dev_node_id_get(uint32_t dv_ind, uint32_t *node_id);
1857 
1886  rsmi_device_identifiers_t *identifiers);
1887  // end of IDQuer
1889 
1890 /*****************************************************************************/
1916 
1964 rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid);
1965 
1988 rsmi_status_t rsmi_topo_numa_affinity_get(uint32_t dv_ind, int32_t *numa_node);
1989 
2014 rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
2015  uint64_t *received, uint64_t *max_pkt_sz);
2016 
2040  uint64_t *counter);
2041  // end of PCIeQuer
2043 /*****************************************************************************/
2078 rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask);
2079  // end of PCIeCont
2081 
2082 /*****************************************************************************/
2118 rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
2119 
2142 rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power);
2143 
2178 rsmi_status_t rsmi_dev_power_get(uint32_t dv_ind, uint64_t *power,
2179  RSMI_POWER_TYPE *type);
2180 
2212 rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power,
2213  float *counter_resolution, uint64_t *timestamp);
2214 
2241 rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap);
2242 
2265 rsmi_dev_power_cap_default_get(uint32_t dv_ind, uint64_t *default_cap);
2266 
2299 rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind,
2300  uint64_t *max, uint64_t *min);
2301  // end of PowerQuer
2303 
2304 /*****************************************************************************/
2330 rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap);
2331 
2352 rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t reserved,
2353  rsmi_power_profile_preset_masks_t profile); // end of PowerCont
2355 /*****************************************************************************/
2356 
2357 
2358 
2359 /*****************************************************************************/
2392  uint64_t *total);
2393 
2422  uint64_t *used);
2423 
2447 rsmi_dev_memory_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent);
2448 
2485 rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages,
2486  rsmi_retired_page_record_t *records); // end of MemQuer
2488 
2520 rsmi_status_t rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind,
2521  int64_t *speed);
2522 
2551  uint32_t sensor_ind, int64_t *speed);
2552 
2579  uint32_t sensor_ind, uint64_t *max_speed);
2580 
2611 rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
2612  rsmi_temperature_metric_t metric, int64_t *temperature);
2613 
2645  rsmi_voltage_type_t sensor_type,
2646  rsmi_voltage_metric_t metric, int64_t *voltage); // end of PhysQuer
2648 
2649 /*****************************************************************************/
2669 rsmi_status_t rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind);
2670 
2693 rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind,
2694  uint64_t speed);
2695  // end of PhysCont
2697 /*****************************************************************************/
2728 rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent);
2729 
2761  rsmi_utilization_counter_t utilization_counters[],
2762  uint32_t count,
2763  uint64_t *timestamp);
2764 
2790  rsmi_activity_metric_t activity_metric_type,
2791  rsmi_activity_metric_counter_t* activity_metric_counter);
2792 
2814 rsmi_dev_activity_avg_mm_get(uint32_t dv_ind, uint16_t* avg_activity);
2815 
2840  rsmi_dev_perf_level_t *perf);
2841 
2864  uint64_t clkvalue);
2865 
2889 rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od);
2890 
2914 rsmi_status_t rsmi_dev_mem_overdrive_level_get(uint32_t dv_ind, uint32_t *od);
2915 
2949  rsmi_clk_type_t clk_type, rsmi_frequencies_t *f);
2950 
2965 
2990 
3012  rsmi_gpu_metrics_t *pgpu_metrics);
3013 
3036 rsmi_status_t rsmi_dev_clk_range_set(uint32_t dv_ind, uint64_t minclkvalue,
3037  uint64_t maxclkvalue,
3038  rsmi_clk_type_t clkType);
3039 
3062  uint64_t clkvalue,
3063  rsmi_clk_type_t clkType);
3064 
3088  uint64_t clkvalue,
3089  rsmi_clk_type_t clkType);
3090 
3112 rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint,
3113  uint64_t clkvalue, uint64_t voltvalue);
3114 
3154  uint32_t *num_regions, rsmi_freq_volt_region_t *buffer);
3155 
3192 rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
3193  rsmi_power_profile_status_t *status);
3194  // end of PerfQuer
3196 /*****************************************************************************/
3197 
3226 
3247 
3291 rsmi_status_t rsmi_dev_overdrive_level_set(uint32_t dv_ind, uint32_t od);
3292 
3332 rsmi_status_t rsmi_dev_overdrive_level_set_v1(uint32_t dv_ind, uint32_t od);
3333 
3369  rsmi_clk_type_t clk_type, uint64_t freq_bitmask);
3370  // end of PerfCont
3372 
3373 /*****************************************************************************/
3394 
3422  uint32_t len);
3423 
3451 rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len);
3452 
3479  uint64_t *fw_version);
3480 
3502  uint64_t *gfx_version);
3503  // end of VersQuer
3505 
3506 /*****************************************************************************/
3540 
3569  uint64_t *enabled_blocks);
3570 
3597  rsmi_ras_err_state_t *state);
3613 rsmi_status_string(rsmi_status_t status, const char **status_string);
3614  // end of ErrQuer
3616 
3617 /*****************************************************************************/
3740 
3771  rsmi_event_handle_t *evnt_handle);
3772 
3788 
3808  rsmi_counter_command_t cmd, void *cmd_args);
3809 
3829  rsmi_counter_value_t *value);
3830 
3852  rsmi_event_group_t grp, uint32_t *available); // end of PerfCntr
3854 
3855 /*****************************************************************************/
3897 
3921 
3957 rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
3958  uint32_t *num_devices);
3959 
3980 rsmi_compute_process_info_by_device_get(uint32_t pid, uint32_t dv_ind,
3981  rsmi_process_info_t *proc);
3982  // end of SysInfo
3984 
3985 /*****************************************************************************/
4016 
4030 rsmi_dev_xgmi_error_reset(uint32_t dv_ind);
4031 
4051 rsmi_dev_xgmi_hive_id_get(uint32_t dv_ind, uint64_t *hive_id);
4052  // end of SysInfo
4054 
4055 /*****************************************************************************/
4079 rsmi_topo_get_numa_node_number(uint32_t dv_ind, uint32_t *numa_node);
4080 
4102 rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst,
4103  uint64_t *weight);
4104 
4128 rsmi_minmax_bandwidth_get(uint32_t dv_ind_src, uint32_t dv_ind_dst,
4129  uint64_t *min_bandwidth, uint64_t *max_bandwidth);
4130 
4164 rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst,
4165  uint64_t *hops, RSMI_IO_LINK_TYPE *type);
4166 
4188 rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
4189  bool *accessible);
4190  // end of HWTopo
4192 
4193 /*****************************************************************************/
4229 rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
4230  uint32_t len);
4231 
4258  rsmi_compute_partition_type_t compute_partition);
4259 
4282 rsmi_status_t rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id);
4283  // end of ComputePartition
4285 
4286 /*****************************************************************************/
4322 rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
4323  uint32_t len);
4324 
4356  uint32_t dv_ind, char *memory_partition_caps, uint32_t len);
4357 
4383  rsmi_memory_partition_type_t memory_partition);
4384  // end of memory_partition
4386 
4387 /*****************************************************************************/
4522  rsmi_func_id_iter_handle_t *handle);
4523 
4550  rsmi_func_id_iter_handle_t *var_iter);
4551 
4572 
4586 
4607  rsmi_func_id_value_t *value);
4608  // end of APISupport
4610 
4611 /*****************************************************************************/
4633 
4661 rsmi_event_notification_mask_set(uint32_t dv_ind, uint64_t mask);
4662 
4705  uint32_t *num_elem, rsmi_evt_notification_data_t *data);
4706 
4726  // end of EvntNotif
4728 
4729 
4730 /*****************************************************************************/
4755 
4773 rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t* xcd_counter_value);
4774 
4787 rsmi_dev_metrics_log_get(uint32_t dv_ind);
4788  // end of DevMetricsHeaderInfoGet
4790 
4791 #ifdef __cplusplus
4792 }
4793 #endif // __cplusplus
4794 #endif // ROCM_SMI_ROCM_SMI_H_
rsmi_status_t rsmi_func_iter_value_get(rsmi_func_id_iter_handle_t handle, rsmi_func_id_value_t *value)
Get the value associated with a function/variant iterator.
rsmi_status_t rsmi_dev_supported_variant_iterator_open(rsmi_func_id_iter_handle_t obj_h, rsmi_func_id_iter_handle_t *var_iter)
Get a variant iterator for a given handle.
rsmi_status_t rsmi_func_iter_next(rsmi_func_id_iter_handle_t handle)
Advance a function identifer iterator.
rsmi_status_t rsmi_dev_supported_func_iterator_close(rsmi_func_id_iter_handle_t *handle)
Close a variant iterator handle.
rsmi_status_t rsmi_dev_supported_func_iterator_open(uint32_t dv_ind, rsmi_func_id_iter_handle_t *handle)
Get a function name iterator of supported RSMI functions for a device.
rsmi_status_t rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id)
Retrieves the partition_id for a desired device.
rsmi_status_t rsmi_dev_compute_partition_set(uint32_t dv_ind, rsmi_compute_partition_type_t compute_partition)
Modifies a selected device's compute partition setting.
rsmi_status_t rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, uint32_t len)
Retrieves the current compute partitioning for a desired device.
rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_ras_err_state_t *state)
Retrieve the ECC status for a GPU block.
rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind, uint64_t *enabled_blocks)
Retrieve the enabled ECC bit-mask.
rsmi_status_t rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_error_count_t *ec)
Retrieve the error counts for a GPU block.
rsmi_status_t rsmi_status_string(rsmi_status_t status, const char **status_string)
Get a description of a provided RSMI error status.
rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind)
Close any file handles and free any resources used by event notification for a GPU.
rsmi_status_t rsmi_event_notification_get(int timeout_ms, uint32_t *num_elem, rsmi_evt_notification_data_t *data)
Collect event notifications, waiting a specified amount of time.
rsmi_status_t rsmi_event_notification_init(uint32_t dv_ind)
Prepare to collect event notifications for a GPU.
rsmi_status_t rsmi_event_notification_mask_set(uint32_t dv_ind, uint64_t mask)
Specify which events to collect for a device.
rsmi_status_t rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t *xcd_counter_value)
Get the 'xcd_counter' from the GPU metrics associated with the device.
rsmi_status_t rsmi_dev_metrics_header_info_get(uint32_t dv_ind, metrics_table_header_t *header_value)
Get the 'metrics_header_info' from the GPU metrics associated with the device.
rsmi_status_t rsmi_dev_metrics_log_get(uint32_t dv_ind)
Get the log from the GPU metrics associated with the device.
rsmi_status_t rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *hops, RSMI_IO_LINK_TYPE *type)
Retrieve the hops and the connection type between GPU to GPU/CPU.
rsmi_status_t rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *weight)
Retrieve the weight for a connection between 2 GPUs.
rsmi_status_t rsmi_minmax_bandwidth_get(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *min_bandwidth, uint64_t *max_bandwidth)
Retreive minimal and maximal io link bandwidth between 2 GPUs.
rsmi_status_t rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst, bool *accessible)
Return P2P availability status between 2 GPUs.
rsmi_status_t rsmi_topo_get_numa_node_number(uint32_t dv_ind, uint32_t *numa_node)
Retrieve the NUMA CPU node number for a device.
rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len)
Get the name string of a gpu device.
rsmi_status_t rsmi_dev_device_identifiers_get(uint32_t dv_ind, rsmi_device_identifiers_t *identifiers)
Retrieves the device identifiers for a specific GPU device.
rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id)
Get Unique ID.
rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices)
Get the number of devices that have monitor information.
rsmi_status_t rsmi_dev_guid_get(uint32_t dv_ind, uint64_t *guid)
Get the GUID, also known as the GPU device id, associated with the provided device index indicated by...
rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint32_t len)
Get the device's market name.
rsmi_status_t rsmi_dev_vram_vendor_get(uint32_t dv_ind, char *brand, uint32_t len)
Get the vram vendor string of a gpu device.
rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *sku)
Get the SKU for a desired device associated with the device with provided device index.
rsmi_status_t rsmi_dev_drm_render_minor_get(uint32_t dv_ind, uint32_t *minor)
Get the drm minor number associated with this device.
rsmi_status_t rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id)
Get the subsystem device id associated with the device with provided device index.
rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id)
Get the XGMI physical id associated with the device.
rsmi_status_t rsmi_dev_serial_number_get(uint32_t dv_ind, char *serial_num, uint32_t len)
Get the serial number string for a device.
rsmi_status_t rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len)
Get the name string for a give vendor ID.
rsmi_status_t rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len)
Get the brand string of a gpu device.
rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id)
Get the device id associated with the device with provided device index.
rsmi_status_t rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len)
Get the name string for the device subsytem.
rsmi_status_t rsmi_dev_node_id_get(uint32_t dv_ind, uint32_t *node_id)
Get the node id associated with the provided device index indicated by KFD.
rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision)
Get the device revision associated with the device.
rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id)
Get the device subsystem vendor id associated with the device with provided device index.
rsmi_status_t rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id)
Get the device vendor id associated with the device with provided device index.
rsmi_status_t rsmi_init(uint64_t init_flags)
Initialize ROCm SMI.
rsmi_status_t rsmi_shut_down(void)
Shutdown ROCm SMI.
rsmi_status_t rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *total)
Get the total amount of memory that exists.
rsmi_status_t rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *used)
Get the current memory usage.
rsmi_status_t rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages, rsmi_retired_page_record_t *records)
Get information about reserved ("retired") memory pages.
rsmi_status_t rsmi_dev_memory_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent)
Get percentage of time any device memory is being used.
rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask)
Control the set of allowed PCIe bandwidths that can be used.
rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)
Get PCIe traffic information.
rsmi_status_t rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *bandwidth)
Get the list of possible PCIe bandwidths that are available.
rsmi_status_t rsmi_topo_numa_affinity_get(uint32_t dv_ind, int32_t *numa_node)
Get the NUMA node associated with a device.
rsmi_status_t rsmi_dev_pci_replay_counter_get(uint32_t dv_ind, uint64_t *counter)
Get PCIe replay counter.
rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid)
Get the unique PCI device identifier associated for a device.
rsmi_status_t rsmi_counter_control(rsmi_event_handle_t evt_handle, rsmi_counter_command_t cmd, void *cmd_args)
Issue performance counter control commands.
rsmi_status_t rsmi_counter_read(rsmi_event_handle_t evt_handle, rsmi_counter_value_t *value)
Read the current value of a performance counter.
rsmi_status_t rsmi_counter_available_counters_get(uint32_t dv_ind, rsmi_event_group_t grp, uint32_t *available)
Get the number of currently available counters.
rsmi_status_t rsmi_dev_counter_destroy(rsmi_event_handle_t evnt_handle)
Deallocate a performance counter object.
rsmi_status_t rsmi_dev_counter_create(uint32_t dv_ind, rsmi_event_type_t type, rsmi_event_handle_t *evnt_handle)
Create a performance counter object.
rsmi_status_t rsmi_dev_counter_group_supported(uint32_t dv_ind, rsmi_event_group_t group)
Tell if an event group is supported by a given device.
rsmi_status_t rsmi_dev_overdrive_level_set_v1(uint32_t dv_ind, uint32_t od)
Set the overdrive percent associated with the device with provided device index with the provided val...
rsmi_status_t rsmi_dev_perf_level_set(uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl)
Set the PowerPlay performance level associated with the device with provided device index with the pr...
rsmi_status_t rsmi_dev_overdrive_level_set(uint32_t dv_ind, uint32_t od)
Set the overdrive percent associated with the device with provided device index with the provided val...
rsmi_status_t rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, rsmi_clk_type_t clk_type, uint64_t freq_bitmask)
Control the set of allowed frequencies that can be used for the specified clock.
rsmi_status_t rsmi_dev_perf_level_set_v1(uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl)
Set the PowerPlay performance level associated with the device with provided device index with the pr...
rsmi_status_t rsmi_dev_clk_range_set(uint32_t dv_ind, uint64_t minclkvalue, uint64_t maxclkvalue, rsmi_clk_type_t clkType)
This function sets the clock range information.
rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind, uint32_t *num_regions, rsmi_freq_volt_region_t *buffer)
This function will retrieve the current valid regions in the frequency/voltage space.
rsmi_status_t rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_power_profile_status_t *status)
Get the list of available preset power profiles and an indication of which profile is currently activ...
rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, uint64_t clkvalue, rsmi_clk_type_t clkType)
This function sets the clock frequency information.
rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf)
Get the performance level of the device with provided device index.
rsmi_status_t rsmi_dev_gpu_reset(uint32_t dv_ind)
Reset the gpu associated with the device with provided device index.
rsmi_status_t rsmi_dev_activity_metric_get(uint32_t dv_ind, rsmi_activity_metric_t activity_metric_type, rsmi_activity_metric_counter_t *activity_metric_counter)
Get activity metric average utilization counter of the specified device.
rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv)
This function retrieves the voltage/frequency curve information.
rsmi_status_t rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *pgpu_metrics)
This function retrieves the gpu metrics information.
rsmi_status_t rsmi_perf_determinism_mode_set(uint32_t dv_ind, uint64_t clkvalue)
Enter performance determinism mode with provided device index.
rsmi_status_t rsmi_utilization_count_get(uint32_t dv_ind, rsmi_utilization_counter_t utilization_counters[], uint32_t count, uint64_t *timestamp)
Get coarse grain utilization counter of the specified device.
rsmi_status_t rsmi_dev_clk_extremum_set(uint32_t dv_ind, rsmi_freq_ind_t level, uint64_t clkvalue, rsmi_clk_type_t clkType)
This function sets the clock min/max level.
rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, uint64_t clkvalue, uint64_t voltvalue)
This function sets 1 of the 3 voltage curve points.
rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od)
Get the overdrive percent associated with the device with provided device index.
rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_frequencies_t *f)
Get the list of possible system clock speeds of device for a specified clock type.
rsmi_status_t rsmi_dev_activity_avg_mm_get(uint32_t dv_ind, uint16_t *avg_activity)
Get activity metric bandwidth average utilization counter of the specified device.
rsmi_status_t rsmi_dev_mem_overdrive_level_get(uint32_t dv_ind, uint32_t *od)
Get the memory clock overdrive percent associated with the device with provided device index.
rsmi_status_t rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent)
Get percentage of time device is busy doing any processing.
rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed)
Set the fan speed for the specified device with the provided speed, in RPMs.
rsmi_status_t rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind)
Reset the fan to automatic driver control.
rsmi_status_t rsmi_dev_volt_metric_get(uint32_t dv_ind, rsmi_voltage_type_t sensor_type, rsmi_voltage_metric_t metric, int64_t *voltage)
Get the voltage metric value for the specified metric, from the specified voltage sensor on the speci...
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type, rsmi_temperature_metric_t metric, int64_t *temperature)
Get the temperature metric value for the specified metric, from the specified temperature sensor on t...
rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed)
Get the max. fan speed of the device with provided device index.
rsmi_status_t rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)
Get the fan speed in RPMs of the device with the specified device index and 0-based sensor index.
rsmi_status_t rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)
Get the fan speed for the specified device as a value relative to RSMI_MAX_FAN_SPEED.
rsmi_status_t rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap)
Set the power cap value.
rsmi_status_t rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t reserved, rsmi_power_profile_preset_masks_t profile)
Set the power profile.
rsmi_status_t rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power, float *counter_resolution, uint64_t *timestamp)
Get the energy accumulator counter of the device with provided device index.
rsmi_status_t rsmi_dev_power_cap_default_get(uint32_t dv_ind, uint64_t *default_cap)
Get the default power cap for the device specified by dv_ind.
rsmi_status_t rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power)
Get the average power consumption of the device with provided device index.
rsmi_status_t rsmi_dev_power_get(uint32_t dv_ind, uint64_t *power, RSMI_POWER_TYPE *type)
A generic get which attempts to retieve current socket power (also known as instant power) of the dev...
rsmi_status_t rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap)
Get the cap on power which, when reached, causes the system to take action to reduce power.
rsmi_status_t rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min)
Get the range of valid values for the power cap.
rsmi_status_t rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power)
Get the current socket power (also known as instant power) of the device index provided.
rsmi_status_t rsmi_compute_process_info_by_device_get(uint32_t pid, uint32_t dv_ind, rsmi_process_info_t *proc)
Get the info of a process on a specific device.
rsmi_status_t rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices, uint32_t *num_devices)
Get the device indices currently being used by a process.
rsmi_status_t rsmi_compute_process_info_by_pid_get(uint32_t pid, rsmi_process_info_t *proc)
Get process information about a specific process.
rsmi_status_t rsmi_compute_process_info_get(rsmi_process_info_t *procs, uint32_t *num_items)
Get process information about processes currently using GPU.
rsmi_status_t rsmi_version_get(rsmi_version_t *version)
Get the build version information for the currently running build of RSMI.
rsmi_status_t rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len)
Get the VBIOS identifer string.
rsmi_status_t rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block, uint64_t *fw_version)
Get the firmware versions for a device.
rsmi_status_t rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str, uint32_t len)
Get the driver version string for the current system.
rsmi_status_t rsmi_dev_target_graphics_version_get(uint32_t dv_ind, uint64_t *gfx_version)
Get the target graphics version for a GPU device.
rsmi_status_t rsmi_dev_xgmi_error_status(uint32_t dv_ind, rsmi_xgmi_status_t *status)
Retrieve the XGMI error status for a device.
rsmi_status_t rsmi_dev_xgmi_error_reset(uint32_t dv_ind)
Reset the XGMI error status for a device.
rsmi_status_t rsmi_dev_xgmi_hive_id_get(uint32_t dv_ind, uint64_t *hive_id)
Retrieve the XGMI hive id for a device.
rsmi_status_t rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, uint32_t len)
Retrieves the current memory partition for a desired device.
rsmi_status_t rsmi_dev_memory_partition_set(uint32_t dv_ind, rsmi_memory_partition_type_t memory_partition)
Modifies a selected device's current memory partition setting.
rsmi_status_t rsmi_dev_memory_partition_capabilities_get(uint32_t dv_ind, char *memory_partition_caps, uint32_t len)
Retrieves the available memory partition capabilities for a desired device.
rsmi_activity_metric_t
Activity (Utilization) Metrics. This enum is used to identify various activity metrics.
Definition: rocm_smi.h:490
@ RSMI_ACTIVITY_UMC
memory controller
Definition: rocm_smi.h:493
@ RSMI_ACTIVITY_MM
UVD or VCN.
Definition: rocm_smi.h:494
struct rsmi_func_id_iter_handle * rsmi_func_id_iter_handle_t
Opaque handle to function-support object.
Definition: rocm_smi.h:1271
rsmi_memory_page_status_t
Reserved Memory Page States.
Definition: rocm_smi.h:685
@ RSMI_MEM_PAGE_STATUS_UNRESERVABLE
Unable to reserve this page.
Definition: rocm_smi.h:691
@ RSMI_MEM_PAGE_STATUS_PENDING
Definition: rocm_smi.h:688
@ RSMI_MEM_PAGE_STATUS_RESERVED
Definition: rocm_smi.h:686
rsmi_status_t
Error codes retured by rocm_smi_lib functions.
Definition: rocm_smi.h:88
@ RSMI_STATUS_UNEXPECTED_DATA
Definition: rocm_smi.h:125
@ RSMI_STATUS_REFCOUNT_OVERFLOW
Definition: rocm_smi.h:130
@ RSMI_STATUS_NOT_FOUND
Definition: rocm_smi.h:115
@ RSMI_STATUS_UNKNOWN_ERROR
An unknown error occurred.
Definition: rocm_smi.h:140
@ RSMI_STATUS_INIT_ERROR
Definition: rocm_smi.h:107
@ RSMI_STATUS_DRM_ERROR
Error when call libdrm.
Definition: rocm_smi.h:136
@ RSMI_STATUS_SETTING_UNAVAILABLE
Definition: rocm_smi.h:132
@ RSMI_STATUS_INSUFFICIENT_SIZE
Definition: rocm_smi.h:117
@ RSMI_STATUS_INVALID_ARGS
Passed in arguments are not valid.
Definition: rocm_smi.h:90
@ RSMI_STATUS_NOT_SUPPORTED
Definition: rocm_smi.h:91
@ RSMI_STATUS_FAIL_LOAD_SYMBOL
Fail to load symbol.
Definition: rocm_smi.h:138
@ RSMI_STATUS_NO_DATA
Definition: rocm_smi.h:123
@ RSMI_STATUS_UNEXPECTED_SIZE
Definition: rocm_smi.h:121
@ RSMI_STATUS_AMDGPU_RESTART_ERR
Definition: rocm_smi.h:134
@ RSMI_STATUS_FILE_ERROR
Definition: rocm_smi.h:94
@ RSMI_STATUS_BUSY
Definition: rocm_smi.h:127
@ RSMI_STATUS_FAIL_LOAD_MODULE
Fail to load lib.
Definition: rocm_smi.h:137
@ RSMI_STATUS_NOT_YET_IMPLEMENTED
Definition: rocm_smi.h:111
@ RSMI_STATUS_OUT_OF_RESOURCES
Definition: rocm_smi.h:102
@ RSMI_STATUS_INTERRUPT
Definition: rocm_smi.h:119
@ RSMI_STATUS_INTERNAL_EXCEPTION
An internal exception was caught.
Definition: rocm_smi.h:104
@ RSMI_STATUS_SUCCESS
Operation was successful.
Definition: rocm_smi.h:89
@ RSMI_STATUS_INPUT_OUT_OF_BOUNDS
Definition: rocm_smi.h:105
@ RSMI_STATUS_PERMISSION
Definition: rocm_smi.h:99
RSMI_UTILIZATION_COUNTER_TYPE
The utilization counter type.
Definition: rocm_smi.h:712
@ RSMI_COARSE_GRAIN_MEM_ACTIVITY
Memory Activity.
Definition: rocm_smi.h:716
@ RSMI_UTILIZATION_COUNTER_FIRST
GFX Activity.
Definition: rocm_smi.h:713
_RSMI_IO_LINK_TYPE
Types for IO Link.
Definition: rocm_smi.h:697
@ RSMI_IOLINK_TYPE_XGMI
XGMI.
Definition: rocm_smi.h:700
@ RSMI_IOLINK_TYPE_UNDEFINED
unknown type.
Definition: rocm_smi.h:698
@ RSMI_IOLINK_TYPE_PCIEXPRESS
PCI Express.
Definition: rocm_smi.h:699
@ RSMI_IOLINK_TYPE_SIZE
Max of IO Link types.
Definition: rocm_smi.h:702
@ RSMI_IOLINK_TYPE_NUMIOLINKTYPES
Number of IO Link types.
Definition: rocm_smi.h:701
rsmi_sw_component_t
Available clock types.
Definition: rocm_smi.h:196
@ RSMI_SW_COMP_DRIVER
Driver.
Definition: rocm_smi.h:199
rsmi_event_group_t
Enum denoting an event group. The value of the enum is the base value for all the event enums in the ...
Definition: rocm_smi.h:219
@ RSMI_EVNT_GRP_XGMI
Data Fabric (XGMI) related events.
Definition: rocm_smi.h:220
@ RSMI_EVNT_GRP_XGMI_DATA_OUT
XGMI Outbound data.
Definition: rocm_smi.h:221
#define MAX_EVENT_NOTIFICATION_MSG_SIZE
Maximum number of characters an event notification message will be.
Definition: rocm_smi.h:342
#define RSMI_MAX_NUM_CLKS
This should match kRSMI_MAX_NUM_CLKS.
Definition: rocm_smi.h:968
#define RSMI_MAX_NUM_JPEG_ENGS
This should match kRSMI_MAX_JPEG_ENGINES.
Definition: rocm_smi.h:958
rsmi_dev_perf_level_t
PowerPlay performance levels.
Definition: rocm_smi.h:163
@ RSMI_DEV_PERF_LEVEL_LOW
Definition: rocm_smi.h:167
@ RSMI_DEV_PERF_LEVEL_MANUAL
Definition: rocm_smi.h:171
@ RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK
Definition: rocm_smi.h:176
@ RSMI_DEV_PERF_LEVEL_UNKNOWN
Unknown performance level.
Definition: rocm_smi.h:184
@ RSMI_DEV_PERF_LEVEL_DETERMINISM
Performance determinism state.
Definition: rocm_smi.h:180
@ RSMI_DEV_PERF_LEVEL_HIGH
Definition: rocm_smi.h:169
@ RSMI_DEV_PERF_LEVEL_STABLE_STD
Definition: rocm_smi.h:173
@ RSMI_DEV_PERF_LEVEL_STABLE_PEAK
Stable power state with peak clocks.
Definition: rocm_smi.h:175
@ RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK
Definition: rocm_smi.h:178
@ RSMI_DEV_PERF_LEVEL_AUTO
Performance level is "auto".
Definition: rocm_smi.h:164
rsmi_voltage_metric_t
Voltage Metrics. This enum is used to identify various Volatge metrics. Corresponding values will be ...
Definition: rocm_smi.h:503
@ RSMI_VOLT_LOWEST
Historical minimum voltage.
Definition: rocm_smi.h:512
@ RSMI_VOLT_MIN_CRIT
Voltage critical min value.
Definition: rocm_smi.h:508
@ RSMI_VOLT_MAX_CRIT
Voltage critical max value.
Definition: rocm_smi.h:510
@ RSMI_VOLT_CURRENT
Voltage current value.
Definition: rocm_smi.h:504
@ RSMI_VOLT_MIN
Voltage min value.
Definition: rocm_smi.h:509
@ RSMI_VOLT_AVERAGE
Average voltage.
Definition: rocm_smi.h:511
@ RSMI_VOLT_MAX
Voltage max value.
Definition: rocm_smi.h:507
@ RSMI_VOLT_HIGHEST
Historical maximum voltage.
Definition: rocm_smi.h:513
rsmi_evt_notification_type_t
Definition: rocm_smi.h:317
@ RSMI_EVT_NOTIF_VMFAULT
VM page fault.
Definition: rocm_smi.h:319
@ RSMI_EVT_NOTIF_NONE
Unused.
Definition: rocm_smi.h:318
#define RSMI_NUM_VOLTAGE_CURVE_POINTS
The number of points that make up a voltage-frequency curve definition.
Definition: rocm_smi.h:82
#define RSMI_MAX_NUM_GFX_CLKS
This should match kRSMI_MAX_NUM_GFX_CLKS.
Definition: rocm_smi.h:978
rsmi_xgmi_status_t
XGMI Status.
Definition: rocm_smi.h:668
rsmi_compute_partition_type_t
Compute Partition. This enum is used to identify various compute partitioning settings.
Definition: rocm_smi.h:379
@ RSMI_COMPUTE_PARTITION_SPX
Definition: rocm_smi.h:381
@ RSMI_COMPUTE_PARTITION_DPX
Definition: rocm_smi.h:383
@ RSMI_COMPUTE_PARTITION_QPX
Definition: rocm_smi.h:387
@ RSMI_COMPUTE_PARTITION_TPX
Definition: rocm_smi.h:385
@ RSMI_COMPUTE_PARTITION_CPX
Definition: rocm_smi.h:389
rsmi_voltage_type_t
This ennumeration is used to indicate which type of voltage reading should be obtained.
Definition: rocm_smi.h:522
@ RSMI_VOLT_TYPE_VDDGFX
Vddgfx GPU voltage.
Definition: rocm_smi.h:525
@ RSMI_VOLT_TYPE_INVALID
Invalid type.
Definition: rocm_smi.h:529
@ RSMI_VOLT_TYPE_VDDBOARD
Voltage for VDDBOARD.
Definition: rocm_smi.h:526
rsmi_temperature_metric_t
Temperature Metrics. This enum is used to identify various temperature metrics. Corresponding values ...
Definition: rocm_smi.h:425
@ RSMI_TEMP_CURRENT
Temperature current value.
Definition: rocm_smi.h:426
@ RSMI_TEMP_LOWEST
Historical minimum temperature.
Definition: rocm_smi.h:457
@ RSMI_TEMP_CRIT_MIN
Definition: rocm_smi.h:449
@ RSMI_TEMP_CRIT_MIN_HYST
Definition: rocm_smi.h:452
@ RSMI_TEMP_MIN
Temperature min value.
Definition: rocm_smi.h:430
@ RSMI_TEMP_MAX
Temperature max value.
Definition: rocm_smi.h:429
@ RSMI_TEMP_EMERGENCY
Definition: rocm_smi.h:442
@ RSMI_TEMP_MAX_HYST
Definition: rocm_smi.h:431
@ RSMI_TEMP_MIN_HYST
Definition: rocm_smi.h:434
@ RSMI_TEMP_CRITICAL_HYST
Definition: rocm_smi.h:439
@ RSMI_TEMP_EMERGENCY_HYST
Definition: rocm_smi.h:446
@ RSMI_TEMP_HIGHEST
Historical maximum temperature.
Definition: rocm_smi.h:458
@ RSMI_TEMP_CRITICAL
Definition: rocm_smi.h:437
@ RSMI_TEMP_OFFSET
Definition: rocm_smi.h:455
rsmi_ras_err_state_t
The current ECC state.
Definition: rocm_smi.h:591
@ RSMI_RAS_ERR_STATE_MULT_UC
Multiple uncorrectable errors.
Definition: rocm_smi.h:596
@ RSMI_RAS_ERR_STATE_POISON
Definition: rocm_smi.h:597
@ RSMI_RAS_ERR_STATE_DISABLED
ECC is disabled.
Definition: rocm_smi.h:593
@ RSMI_RAS_ERR_STATE_PARITY
ECC errors present, but type unknown.
Definition: rocm_smi.h:594
@ RSMI_RAS_ERR_STATE_SING_C
Single correctable error.
Definition: rocm_smi.h:595
@ RSMI_RAS_ERR_STATE_ENABLED
ECC is enabled.
Definition: rocm_smi.h:599
@ RSMI_RAS_ERR_STATE_NONE
No current errors.
Definition: rocm_smi.h:592
#define RSMI_NUM_HBM_INSTANCES
This should match kRSMI_MAX_NUM_HBM_INSTANCES.
Definition: rocm_smi.h:948
#define RSMI_MAX_NUM_XCC
This should match kRSMI_MAX_NUM_XCC; XCC - Accelerated Compute Core, the collection of compute units,...
Definition: rocm_smi.h:989
rsmi_event_type_t
Event type enum. Events belonging to a particular event group rsmi_event_group_t should begin enumera...
Definition: rocm_smi.h:231
@ RSMI_EVNT_XGMI_1_RESPONSE_TX
Definition: rocm_smi.h:258
@ RSMI_EVNT_XGMI_DATA_OUT_5
Outbound beats to neighbor 5.
Definition: rocm_smi.h:288
@ RSMI_EVNT_XGMI_1_BEATS_TX
Definition: rocm_smi.h:260
@ RSMI_EVNT_XGMI_1_NOP_TX
NOPs sent to neighbor 1.
Definition: rocm_smi.h:255
@ RSMI_EVNT_XGMI_0_NOP_TX
NOPs sent to neighbor 0.
Definition: rocm_smi.h:235
@ RSMI_EVNT_XGMI_1_REQUEST_TX
neighbor 1
Definition: rocm_smi.h:256
@ RSMI_EVNT_XGMI_DATA_OUT_3
Outbound beats to neighbor 3.
Definition: rocm_smi.h:286
@ RSMI_EVNT_XGMI_DATA_OUT_4
Outbound beats to neighbor 4.
Definition: rocm_smi.h:287
@ RSMI_EVNT_XGMI_DATA_OUT_2
Outbound beats to neighbor 2.
Definition: rocm_smi.h:285
@ RSMI_EVNT_XGMI_0_RESPONSE_TX
Definition: rocm_smi.h:238
@ RSMI_EVNT_XGMI_DATA_OUT_1
Outbound beats to neighbor 1.
Definition: rocm_smi.h:284
@ RSMI_EVNT_XGMI_0_BEATS_TX
Data beats sent to neighbor 0; Each beat represents 32 bytes.
Definition: rocm_smi.h:254
@ RSMI_EVNT_XGMI_0_REQUEST_TX
Definition: rocm_smi.h:236
rsmi_freq_ind_t
The values of this enum are used as frequency identifiers.
Definition: rocm_smi.h:621
@ RSMI_FREQ_IND_MAX
Index used for the maximum frequency value.
Definition: rocm_smi.h:623
@ RSMI_FREQ_IND_MIN
Index used for the minimum frequency value.
Definition: rocm_smi.h:622
@ RSMI_FREQ_IND_INVALID
An invalid frequency index.
Definition: rocm_smi.h:624
rsmi_memory_type_t
Types of memory.
Definition: rocm_smi.h:608
@ RSMI_MEM_TYPE_VRAM
VRAM memory.
Definition: rocm_smi.h:611
@ RSMI_MEM_TYPE_GTT
GTT memory.
Definition: rocm_smi.h:613
@ RSMI_MEM_TYPE_VIS_VRAM
VRAM memory that is visible.
Definition: rocm_smi.h:612
rsmi_power_profile_preset_masks_t
Pre-set Profile Selections. These bitmasks can be AND'd with the rsmi_power_profile_status_t....
Definition: rocm_smi.h:538
@ RSMI_PWR_PROF_PRST_LAST
Invalid power profile.
Definition: rocm_smi.h:548
@ RSMI_PWR_PROF_PRST_VIDEO_MASK
Video Power Profile.
Definition: rocm_smi.h:540
@ RSMI_PWR_PROF_PRST_VR_MASK
VR Power Profile.
Definition: rocm_smi.h:543
@ RSMI_PWR_PROF_PRST_CUSTOM_MASK
Custom Power Profile.
Definition: rocm_smi.h:539
@ RSMI_PWR_PROF_PRST_POWER_SAVING_MASK
Power Saving Profile.
Definition: rocm_smi.h:541
@ RSMI_PWR_PROF_PRST_COMPUTE_MASK
Compute Saving Profile.
Definition: rocm_smi.h:542
@ RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT
Default Boot Up Profile.
Definition: rocm_smi.h:547
rsmi_gpu_block_t
This enum is used to identify different GPU blocks.
Definition: rocm_smi.h:560
@ RSMI_GPU_BLOCK_SMN
SMN block.
Definition: rocm_smi.h:574
@ RSMI_GPU_BLOCK_ATHUB
ATHUB block.
Definition: rocm_smi.h:569
@ RSMI_GPU_BLOCK_GFX
GFX block.
Definition: rocm_smi.h:567
@ RSMI_GPU_BLOCK_MMHUB
MMHUB block.
Definition: rocm_smi.h:568
@ RSMI_GPU_BLOCK_FUSE
Fuse block.
Definition: rocm_smi.h:578
@ RSMI_GPU_BLOCK_HDP
HDP block.
Definition: rocm_smi.h:571
@ RSMI_GPU_BLOCK_DF
DF block.
Definition: rocm_smi.h:573
@ RSMI_GPU_BLOCK_SEM
SEM block.
Definition: rocm_smi.h:575
@ RSMI_GPU_BLOCK_INVALID
Definition: rocm_smi.h:561
@ RSMI_GPU_BLOCK_MP1
MP1 block.
Definition: rocm_smi.h:577
@ RSMI_GPU_BLOCK_XGMI_WAFL
XGMI block.
Definition: rocm_smi.h:572
@ RSMI_GPU_BLOCK_UMC
UMC block.
Definition: rocm_smi.h:565
@ RSMI_GPU_BLOCK_LAST
for supported blocks
Definition: rocm_smi.h:580
@ RSMI_GPU_BLOCK_PCIE_BIF
PCIE_BIF block.
Definition: rocm_smi.h:570
@ RSMI_GPU_BLOCK_MP0
MP0 block.
Definition: rocm_smi.h:576
@ RSMI_GPU_BLOCK_SDMA
SDMA block.
Definition: rocm_smi.h:566
#define RSMI_MAX_NUM_VCNS
This should match kRSMI_MAX_NUM_VCNS.
Definition: rocm_smi.h:953
#define RSMI_MAX_NUM_JPEG_ENG_V1
This should match kRSMI_MAX_NUM_JPEG_ENG_V1.
Definition: rocm_smi.h:963
rsmi_fw_block_t
The values of this enum are used to identify the various firmware blocks.
Definition: rocm_smi.h:635
rsmi_init_flags_t
Initialization flags.
Definition: rocm_smi.h:149
@ RSMI_INIT_FLAG_RESRV_TEST1
Reserved for test.
Definition: rocm_smi.h:157
@ RSMI_INIT_FLAG_THRAD_ONLY_MUTEX
The mutex limit to thread.
Definition: rocm_smi.h:156
@ RSMI_INIT_FLAG_ALL_GPUS
Definition: rocm_smi.h:150
enum _RSMI_IO_LINK_TYPE RSMI_IO_LINK_TYPE
Types for IO Link.
#define RSMI_MAX_NUM_FREQUENCIES
Definition: rocm_smi.h:75
#define RSMI_MAX_NUM_XGMI_LINKS
This should match kRSMI_MAX_NUM_XGMI_LINKS.
Definition: rocm_smi.h:973
uintptr_t rsmi_event_handle_t
Handle to performance event counter.
Definition: rocm_smi.h:211
rsmi_counter_command_t
Definition: rocm_smi.h:297
@ RSMI_CNTR_CMD_START
Start the counter.
Definition: rocm_smi.h:298
@ RSMI_CNTR_CMD_STOP
Definition: rocm_smi.h:299
uint64_t rsmi_bit_field_t
Bitfield used in various RSMI calls.
Definition: rocm_smi.h:677
#define RSMI_MAX_NUM_XCP
This should match kRSMI_MAX_NUM_XCP; XCP - Accelerated Compute Processor, also referred to as the Gra...
Definition: rocm_smi.h:1001
rsmi_clk_type_t
Definition: rocm_smi.h:356
@ RSMI_CLK_TYPE_MEM
Memory clock.
Definition: rocm_smi.h:363
@ RSMI_CLK_TYPE_DCEF
Display Controller Engine clock.
Definition: rocm_smi.h:361
@ RSMI_CLK_TYPE_PCIE
PCIE clock.
Definition: rocm_smi.h:364
@ RSMI_CLK_TYPE_SOC
SOC clock.
Definition: rocm_smi.h:362
@ RSMI_CLK_TYPE_DF
Definition: rocm_smi.h:359
@ RSMI_CLK_TYPE_SYS
System clock.
Definition: rocm_smi.h:357
rsmi_memory_partition_type_t
Memory Partitions. This enum is used to identify various memory partition types.
Definition: rocm_smi.h:400
@ RSMI_MEMORY_PARTITION_NPS8
Definition: rocm_smi.h:410
@ RSMI_MEMORY_PARTITION_NPS4
Definition: rocm_smi.h:407
@ RSMI_MEMORY_PARTITION_NPS2
Definition: rocm_smi.h:404
@ RSMI_MEMORY_PARTITION_NPS1
Definition: rocm_smi.h:402
union id rsmi_func_id_value_t
This union holds the value of an rsmi_func_id_iter_handle_t. The value may be a function name,...
RSMI_POWER_TYPE
Power types.
Definition: rocm_smi.h:723
@ RSMI_AVERAGE_POWER
Average Power.
Definition: rocm_smi.h:724
@ RSMI_INVALID_POWER
Invalid / Undetected Power.
Definition: rocm_smi.h:726
@ RSMI_CURRENT_POWER
Current / Instant Power.
Definition: rocm_smi.h:725
rsmi_temperature_type_t
This enumeration is used to indicate from which part of the device a temperature reading should be ob...
Definition: rocm_smi.h:470
@ RSMI_TEMP_TYPE_HBM_2
HBM temperature instance 2.
Definition: rocm_smi.h:479
@ RSMI_TEMP_TYPE_HBM_0
HBM temperature instance 0.
Definition: rocm_smi.h:477
@ RSMI_TEMP_TYPE_HBM_1
HBM temperature instance 1.
Definition: rocm_smi.h:478
@ RSMI_TEMP_TYPE_MEMORY
VRAM temperature.
Definition: rocm_smi.h:476
@ RSMI_TEMP_TYPE_INVALID
Invalid type.
Definition: rocm_smi.h:482
@ RSMI_TEMP_TYPE_EDGE
Edge GPU temperature.
Definition: rocm_smi.h:473
@ RSMI_TEMP_TYPE_JUNCTION
Definition: rocm_smi.h:474
@ RSMI_TEMP_TYPE_HBM_3
HBM temperature instance 3.
Definition: rocm_smi.h:480
The following structures hold the gpu statistics for a device.
Definition: rocm_smi.h:1006
uint64_t gfx_below_host_limit_ppt_acc[RSMI_MAX_NUM_XCC]
Definition: rocm_smi.h:1027
The following structures hold the gpu metrics values for a device.
Definition: rocm_smi.h:927
This structure holds information about the possible activity averages. Specifically,...
Definition: rocm_smi.h:831
uint16_t average_mm_activity
UVD or VCN.
Definition: rocm_smi.h:835
uint16_t average_gfx_activity
Average graphics activity.
Definition: rocm_smi.h:833
uint16_t average_umc_activity
memory controller
Definition: rocm_smi.h:834
Definition: rocm_smi.h:306
uint64_t value
Counter value.
Definition: rocm_smi.h:307
uint64_t time_enabled
Definition: rocm_smi.h:308
uint64_t time_running
Definition: rocm_smi.h:310
Structure to hold various identifiers for a GPU device.
Definition: rocm_smi.h:1311
uint32_t drm_render_minor
The PCI Bus/Device/Function identifier (BDFID) of the device.
Definition: rocm_smi.h:1315
uint32_t card_index
< The card index of the device.
Definition: rocm_smi.h:1313
uint64_t bdfid
The KFD (Kernel Fusion Driver) GPU ID of the device.
Definition: rocm_smi.h:1318
uint32_t partition_id
The SMI (System Management Interface) device ID.
Definition: rocm_smi.h:1324
uint64_t kfd_gpu_id
The partition ID of the device.
Definition: rocm_smi.h:1321
This structure holds error counts.
Definition: rocm_smi.h:1249
uint64_t correctable_err
Accumulated correctable errors.
Definition: rocm_smi.h:1250
uint64_t uncorrectable_err
Accumulated uncorrectable errors.
Definition: rocm_smi.h:1251
Definition: rocm_smi.h:347
rsmi_evt_notification_type_t event
Event type.
Definition: rocm_smi.h:349
uint32_t dv_ind
Index of device that corresponds to the event.
Definition: rocm_smi.h:348
This structure holds 2 rsmi_range_t's, one for frequency and one for voltage. These 2 ranges indicate...
Definition: rocm_smi.h:877
rsmi_range_t volt_range
The voltage range for this VDDC Curve point.
Definition: rocm_smi.h:879
rsmi_range_t freq_range
The frequency range for this VDDC Curve point.
Definition: rocm_smi.h:878
This structure holds information about clock frequencies.
Definition: rocm_smi.h:779
uint32_t current
Definition: rocm_smi.h:793
uint32_t num_supported
Definition: rocm_smi.h:788
bool has_deep_sleep
Definition: rocm_smi.h:783
Definition: rocm_smi.h:1033
This structure represents a point on the frequency-voltage plane.
Definition: rocm_smi.h:864
uint64_t frequency
Frequency coordinate (in Hz)
Definition: rocm_smi.h:865
uint64_t voltage
Voltage coordinate (in mV)
Definition: rocm_smi.h:866
Definition: rocm_smi.h:888
This structure holds the frequency-voltage values for a device.
Definition: rocm_smi.h:902
rsmi_range_t mclk_freq_limits
The range possible of MCLK values.
Definition: rocm_smi.h:907
uint32_t num_regions
The number of voltage curve regions.
Definition: rocm_smi.h:913
rsmi_range_t curr_mclk_range
Definition: rocm_smi.h:904
rsmi_range_t sclk_freq_limits
The range possible of SCLK values.
Definition: rocm_smi.h:906
rsmi_range_t curr_sclk_range
The current SCLK frequency range.
Definition: rocm_smi.h:903
rsmi_od_volt_curve_t curve
The current voltage curve.
Definition: rocm_smi.h:912
This structure holds information about the possible PCIe bandwidths. Specifically,...
Definition: rocm_smi.h:810
rsmi_frequencies_t transfer_rate
Definition: rocm_smi.h:814
This structure contains information about which power profiles are supported by the system for a give...
Definition: rocm_smi.h:756
uint32_t num_profiles
Definition: rocm_smi.h:770
rsmi_bit_field_t available_profiles
Definition: rocm_smi.h:760
rsmi_power_profile_preset_masks_t current
Definition: rocm_smi.h:765
This structure contains information specific to a process.
Definition: rocm_smi.h:1257
uint32_t cu_occupancy
Compute Unit usage in percent.
Definition: rocm_smi.h:1262
uint32_t pasid
PASID.
Definition: rocm_smi.h:1259
uint32_t process_id
Process ID.
Definition: rocm_smi.h:1258
uint64_t sdma_usage
SDMA usage in microseconds.
Definition: rocm_smi.h:1261
uint64_t vram_usage
VRAM usage.
Definition: rocm_smi.h:1260
This structure represents a range (e.g., frequencies or voltages).
Definition: rocm_smi.h:853
uint64_t upper_bound
Upper bound of range.
Definition: rocm_smi.h:855
uint64_t lower_bound
Lower bound of range.
Definition: rocm_smi.h:854
Reserved Memory Page Record.
Definition: rocm_smi.h:740
uint64_t page_size
Page size.
Definition: rocm_smi.h:742
rsmi_memory_page_status_t status
Page "reserved" status.
Definition: rocm_smi.h:743
uint64_t page_address
Start address of page.
Definition: rocm_smi.h:741
The utilization counter data.
Definition: rocm_smi.h:732
RSMI_UTILIZATION_COUNTER_TYPE type
Utilization counter type.
Definition: rocm_smi.h:733
uint64_t value
Utilization counter value.
Definition: rocm_smi.h:734
This structure holds version information.
Definition: rocm_smi.h:841
uint32_t patch
Patch, build or stepping version.
Definition: rocm_smi.h:844
const char * build
Build string.
Definition: rocm_smi.h:845
uint32_t minor
Minor version.
Definition: rocm_smi.h:843
uint32_t major
Major version.
Definition: rocm_smi.h:842
This union holds the value of an rsmi_func_id_iter_handle_t. The value may be a function name,...
Definition: rocm_smi.h:1282
rsmi_temperature_metric_t temp_metric
Definition: rocm_smi.h:1289
rsmi_memory_type_t memory_type
Definition: rocm_smi.h:1287
rsmi_event_group_t evnt_group
Definition: rocm_smi.h:1293
rsmi_clk_type_t clk_type
Definition: rocm_smi.h:1295
uint64_t id
uint64_t representation of value
Definition: rocm_smi.h:1283
rsmi_event_type_t evnt_type
Definition: rocm_smi.h:1291
rsmi_gpu_block_t gpu_block_type
Definition: rocm_smi.h:1299
rsmi_fw_block_t fw_block
Definition: rocm_smi.h:1297
const char * name
name string (applicable to functions only)
Definition: rocm_smi.h:1284