Global enum and defines#
Data Structures | |
struct | dim3 |
struct | hipLaunchParams |
struct | hipExternalMemoryHandleDesc |
struct | hipExternalMemoryBufferDesc |
struct | hipExternalSemaphoreHandleDesc |
struct | hipExternalSemaphoreSignalParams |
struct | hipExternalSemaphoreWaitParams |
Typedefs | |
typedef enum __HIP_NODISCARD hipError_t | hipError_t |
typedef void * | hipExternalMemory_t |
typedef void * | hipExternalSemaphore_t |
typedef struct _hipGraphicsResource | hipGraphicsResource |
typedef hipGraphicsResource * | hipGraphicsResource_t |
Detailed Description
Macro Definition Documentation
◆ __HIP_NODISCARD
#define __HIP_NODISCARD |
◆ hipArrayCubemap
#define hipArrayCubemap 0x04 |
◆ hipArrayDefault
#define hipArrayDefault 0x00 |
Default HIP array allocation flag.
◆ hipArrayLayered
#define hipArrayLayered 0x01 |
◆ hipArraySurfaceLoadStore
#define hipArraySurfaceLoadStore 0x02 |
◆ hipArrayTextureGather
#define hipArrayTextureGather 0x08 |
◆ hipCooperativeLaunchMultiDeviceNoPostSync
#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 |
◆ hipCooperativeLaunchMultiDeviceNoPreSync
#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 |
◆ hipCpuDeviceId
#define hipCpuDeviceId ((int)-1) |
◆ hipDeviceLmemResizeToMax
#define hipDeviceLmemResizeToMax 0x16 |
◆ hipDeviceMallocDefault
#define hipDeviceMallocDefault 0x0 |
◆ hipDeviceMallocFinegrained
#define hipDeviceMallocFinegrained 0x1 |
Memory is allocated in fine grained region of device.
◆ hipDeviceMapHost
#define hipDeviceMapHost 0x8 |
◆ hipDeviceScheduleAuto
#define hipDeviceScheduleAuto 0x0 |
Automatically select between Spin and Yield.
◆ hipDeviceScheduleBlockingSync
#define hipDeviceScheduleBlockingSync 0x4 |
◆ hipDeviceScheduleMask
#define hipDeviceScheduleMask 0x7 |
◆ hipDeviceScheduleSpin
#define hipDeviceScheduleSpin 0x1 |
Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
◆ hipDeviceScheduleYield
#define hipDeviceScheduleYield 0x2 |
Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.
◆ hipEventBlockingSync
#define hipEventBlockingSync 0x1 |
Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
◆ hipEventDefault
#define hipEventDefault 0x0 |
Default flags.
◆ hipEventDisableTiming
#define hipEventDisableTiming 0x2 |
Disable event's capability to record timing information. May improve performance.
◆ hipEventInterprocess
#define hipEventInterprocess 0x4 |
Event can support IPC. Warnig: It is not supported in HIP.
◆ hipEventReleaseToDevice
#define hipEventReleaseToDevice 0x40000000 |
Use a device-scope release when recording this event. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms.
◆ hipEventReleaseToSystem
#define hipEventReleaseToSystem 0x80000000 |
Use a system-scope release when recording this event. This flag is useful to make non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.
◆ hipExtAnyOrderLaunch
#define hipExtAnyOrderLaunch 0x01 |
AnyOrderLaunch of kernels.
◆ hipExtHostRegisterCoarseGrained
#define hipExtHostRegisterCoarseGrained 0x8 |
Coarse Grained host memory lock.
◆ hipHostMallocCoherent
#define hipHostMallocCoherent 0x40000000 |
Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
◆ hipHostMallocDefault
#define hipHostMallocDefault 0x0 |
Default pinned memory allocation on the host.
◆ hipHostMallocMapped
#define hipHostMallocMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer.
◆ hipHostMallocNonCoherent
#define hipHostMallocNonCoherent 0x80000000 |
Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
◆ hipHostMallocNumaUser
#define hipHostMallocNumaUser 0x20000000 |
Host memory allocation will follow numa policy set by user.
◆ hipHostMallocPortable
#define hipHostMallocPortable 0x1 |
Memory is considered allocated by all contexts.
◆ hipHostMallocWriteCombined
#define hipHostMallocWriteCombined 0x4 |
Allocates the memory as write-combined. On some system configurations, write-combined allocation may be transferred faster across the PCI Express bus, however, could have low read efficiency by most CPUs. It's a good option for data tranfer from host to device via mapped pinned memory.
◆ hipHostRegisterDefault
#define hipHostRegisterDefault 0x0 |
Memory is Mapped and Portable.
◆ hipHostRegisterIoMemory
#define hipHostRegisterIoMemory 0x4 |
Not supported.
◆ hipHostRegisterMapped
#define hipHostRegisterMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer.
◆ hipHostRegisterPortable
#define hipHostRegisterPortable 0x1 |
Memory is considered registered by all contexts.
◆ hipInvalidDeviceId
#define hipInvalidDeviceId ((int)-2) |
◆ hipMallocSignalMemory
#define hipMallocSignalMemory 0x2 |
Memory represents a HSA signal.
◆ hipMemAttachGlobal
#define hipMemAttachGlobal 0x01 |
Memory can be accessed by any stream on any device
◆ hipMemAttachHost
#define hipMemAttachHost 0x02 |
Memory cannot be accessed by any stream on any device.
◆ hipMemAttachSingle
#define hipMemAttachSingle 0x04 |
Memory can only be accessed by a single stream on the associated device.
◆ hipOccupancyDefault
#define hipOccupancyDefault 0x00 |
◆ hipStreamDefault
#define hipStreamDefault 0x00 |
Default stream creation flags. These are used with hipStreamCreate().
◆ hipStreamNonBlocking
#define hipStreamNonBlocking 0x01 |
Stream does not implicitly synchronize with null stream.
◆ hipStreamPerThread
#define hipStreamPerThread ((hipStream_t)2) |
Implicit stream per application thread.
◆ hipStreamWaitValueAnd
#define hipStreamWaitValueAnd 0x2 |
◆ hipStreamWaitValueEq
#define hipStreamWaitValueEq 0x1 |
◆ hipStreamWaitValueGte
#define hipStreamWaitValueGte 0x0 |
◆ hipStreamWaitValueNor
#define hipStreamWaitValueNor 0x3 |
Typedef Documentation
◆ hipError_t
◆ hipExternalMemory_t
typedef void* hipExternalMemory_t |
◆ hipExternalSemaphore_t
typedef void* hipExternalSemaphore_t |
◆ hipGraphicsResource
typedef struct _hipGraphicsResource hipGraphicsResource |
◆ hipGraphicsResource_t
typedef hipGraphicsResource* hipGraphicsResource_t |
Enumeration Type Documentation
◆ hipComputeMode
enum hipComputeMode |
◆ hipDeviceAttribute_t
enum hipDeviceAttribute_t |
Enumerator | |
---|---|
hipDeviceAttributeCudaCompatibleBegin | |
hipDeviceAttributeEccEnabled | Whether ECC support is enabled. |
hipDeviceAttributeAccessPolicyMaxWindowSize | Cuda only. The maximum size of the window policy in bytes. |
hipDeviceAttributeAsyncEngineCount | Cuda only. Asynchronous engines number. |
hipDeviceAttributeCanMapHostMemory | Whether host memory can be mapped into device address space. |
hipDeviceAttributeCanUseHostPointerForRegisteredMem | Cuda only. Device can access host registered memory at the same virtual address as the CPU |
hipDeviceAttributeClockRate | Peak clock frequency in kilohertz. |
hipDeviceAttributeComputeMode | Compute mode that device is currently in. |
hipDeviceAttributeComputePreemptionSupported | Cuda only. Device supports Compute Preemption. |
hipDeviceAttributeConcurrentKernels | Device can possibly execute multiple kernels concurrently. |
hipDeviceAttributeConcurrentManagedAccess | Device can coherently access managed memory concurrently with the CPU. |
hipDeviceAttributeCooperativeLaunch | Support cooperative launch. |
hipDeviceAttributeCooperativeMultiDeviceLaunch | Support cooperative launch on multiple devices. |
hipDeviceAttributeDeviceOverlap | Cuda only. Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. |
hipDeviceAttributeDirectManagedMemAccessFromHost | Host can directly access managed memory on the device without migration |
hipDeviceAttributeGlobalL1CacheSupported | Cuda only. Device supports caching globals in L1. |
hipDeviceAttributeHostNativeAtomicSupported | Cuda only. Link between the device and the host supports native atomic operations. |
hipDeviceAttributeIntegrated | Device is integrated GPU. |
hipDeviceAttributeIsMultiGpuBoard | Multiple GPU devices. |
hipDeviceAttributeKernelExecTimeout | Run time limit for kernels executed on the device. |
hipDeviceAttributeL2CacheSize | Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. |
hipDeviceAttributeLocalL1CacheSupported | caching locals in L1 is supported |
hipDeviceAttributeLuid | Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms. |
hipDeviceAttributeLuidDeviceNodeMask | Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms. |
hipDeviceAttributeComputeCapabilityMajor | Major compute capability version number. |
hipDeviceAttributeManagedMemory | Device supports allocating managed memory on this system. |
hipDeviceAttributeMaxBlocksPerMultiProcessor | Cuda only. Max block size per multiprocessor. |
hipDeviceAttributeMaxBlockDimX | Max block size in width. |
hipDeviceAttributeMaxBlockDimY | Max block size in height. |
hipDeviceAttributeMaxBlockDimZ | Max block size in depth. |
hipDeviceAttributeMaxGridDimX | Max grid size in width. |
hipDeviceAttributeMaxGridDimY | Max grid size in height. |
hipDeviceAttributeMaxGridDimZ | Max grid size in depth. |
hipDeviceAttributeMaxSurface1D | Maximum size of 1D surface. |
hipDeviceAttributeMaxSurface1DLayered | Cuda only. Maximum dimensions of 1D layered surface. |
hipDeviceAttributeMaxSurface2D | Maximum dimension (width, height) of 2D surface. |
hipDeviceAttributeMaxSurface2DLayered | Cuda only. Maximum dimensions of 2D layered surface. |
hipDeviceAttributeMaxSurface3D | Maximum dimension (width, height, depth) of 3D surface. |
hipDeviceAttributeMaxSurfaceCubemap | Cuda only. Maximum dimensions of Cubemap surface. |
hipDeviceAttributeMaxSurfaceCubemapLayered | Cuda only. Maximum dimension of Cubemap layered surface. |
hipDeviceAttributeMaxTexture1DWidth | Maximum size of 1D texture. |
hipDeviceAttributeMaxTexture1DLayered | Cuda only. Maximum dimensions of 1D layered texture. |
hipDeviceAttributeMaxTexture1DLinear | Maximum number of elements allocatable in a 1D linear texture. Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. |
hipDeviceAttributeMaxTexture1DMipmap | Cuda only. Maximum size of 1D mipmapped texture. |
hipDeviceAttributeMaxTexture2DWidth | Maximum dimension width of 2D texture. |
hipDeviceAttributeMaxTexture2DHeight | Maximum dimension hight of 2D texture. |
hipDeviceAttributeMaxTexture2DGather | Cuda only. Maximum dimensions of 2D texture if gather operations performed. |
hipDeviceAttributeMaxTexture2DLayered | Cuda only. Maximum dimensions of 2D layered texture. |
hipDeviceAttributeMaxTexture2DLinear | Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. |
hipDeviceAttributeMaxTexture2DMipmap | Cuda only. Maximum dimensions of 2D mipmapped texture. |
hipDeviceAttributeMaxTexture3DWidth | Maximum dimension width of 3D texture. |
hipDeviceAttributeMaxTexture3DHeight | Maximum dimension height of 3D texture. |
hipDeviceAttributeMaxTexture3DDepth | Maximum dimension depth of 3D texture. |
hipDeviceAttributeMaxTexture3DAlt | Cuda only. Maximum dimensions of alternate 3D texture. |
hipDeviceAttributeMaxTextureCubemap | Cuda only. Maximum dimensions of Cubemap texture. |
hipDeviceAttributeMaxTextureCubemapLayered | Cuda only. Maximum dimensions of Cubemap layered texture. |
hipDeviceAttributeMaxThreadsDim | Maximum dimension of a block. |
hipDeviceAttributeMaxThreadsPerBlock | Maximum number of threads per block. |
hipDeviceAttributeMaxThreadsPerMultiProcessor | Maximum resident threads per multiprocessor. |
hipDeviceAttributeMaxPitch | Maximum pitch in bytes allowed by memory copies. |
hipDeviceAttributeMemoryBusWidth | Global memory bus width in bits. |
hipDeviceAttributeMemoryClockRate | Peak memory clock frequency in kilohertz. |
hipDeviceAttributeComputeCapabilityMinor | Minor compute capability version number. |
hipDeviceAttributeMultiGpuBoardGroupID | Cuda only. Unique ID of device group on the same multi-GPU board. |
hipDeviceAttributeMultiprocessorCount | Number of multiprocessors on the device. |
hipDeviceAttributeName | Device name. |
hipDeviceAttributePageableMemoryAccess | Device supports coherently accessing pageable memory without calling hipHostRegister on it |
hipDeviceAttributePageableMemoryAccessUsesHostPageTables | Device accesses pageable memory via the host's page tables. |
hipDeviceAttributePciBusId | PCI Bus ID. |
hipDeviceAttributePciDeviceId | PCI Device ID. |
hipDeviceAttributePciDomainID | PCI Domain ID. |
hipDeviceAttributePersistingL2CacheMaxSize | Cuda11 only. Maximum l2 persisting lines capacity in bytes. |
hipDeviceAttributeMaxRegistersPerBlock | 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. |
hipDeviceAttributeMaxRegistersPerMultiprocessor | 32-bit registers available per block. |
hipDeviceAttributeReservedSharedMemPerBlock | Cuda11 only. Shared memory reserved by CUDA driver per block. |
hipDeviceAttributeMaxSharedMemoryPerBlock | Maximum shared memory available per block in bytes. |
hipDeviceAttributeSharedMemPerBlockOptin | Cuda only. Maximum shared memory per block usable by special opt in. |
hipDeviceAttributeSharedMemPerMultiprocessor | Cuda only. Shared memory available per multiprocessor. |
hipDeviceAttributeSingleToDoublePrecisionPerfRatio | Cuda only. Performance ratio of single precision to double precision. |
hipDeviceAttributeStreamPrioritiesSupported | Cuda only. Whether to support stream priorities. |
hipDeviceAttributeSurfaceAlignment | Cuda only. Alignment requirement for surfaces. |
hipDeviceAttributeTccDriver | Cuda only. Whether device is a Tesla device using TCC driver. |
hipDeviceAttributeTextureAlignment | Alignment requirement for textures. |
hipDeviceAttributeTexturePitchAlignment | Pitch alignment requirement for 2D texture references bound to pitched memory;. |
hipDeviceAttributeTotalConstantMemory | Constant memory size in bytes. |
hipDeviceAttributeTotalGlobalMem | Global memory available on devicice. |
hipDeviceAttributeUnifiedAddressing | Cuda only. An unified address space shared with the host. |
hipDeviceAttributeUuid | Cuda only. Unique ID in 16 byte. |
hipDeviceAttributeWarpSize | Warp size in threads. |
hipDeviceAttributeCudaCompatibleEnd | |
hipDeviceAttributeAmdSpecificBegin | |
hipDeviceAttributeClockInstructionRate | Frequency in khz of the timer used by the device-side "clock*". |
hipDeviceAttributeArch | Device architecture. |
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor | Maximum Shared Memory PerMultiprocessor. |
hipDeviceAttributeGcnArch | Device gcn architecture. |
hipDeviceAttributeGcnArchName | Device gcnArch name in 256 bytes. |
hipDeviceAttributeHdpMemFlushCntl | Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register. |
hipDeviceAttributeHdpRegFlushCntl | Address of the HDP_REG_COHERENCY_FLUSH_CNTL register. |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc | Supports cooperative launch on multiple devices with unmatched functions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim | Supports cooperative launch on multiple devices with unmatched grid dimensions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim | Supports cooperative launch on multiple devices with unmatched block dimensions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem | Supports cooperative launch on multiple devices with unmatched shared memories |
hipDeviceAttributeIsLargeBar | Whether it is LargeBar. |
hipDeviceAttributeAsicRevision | Revision of the GPU in this device. |
hipDeviceAttributeCanUseStreamWaitValue | '1' if Device supports hipStreamWaitValue32() and hipStreamWaitValue64(), '0' otherwise. |
hipDeviceAttributeImageSupport | '1' if Device supports image, '0' otherwise. |
hipDeviceAttributePhysicalMultiProcessorCount | All available physical compute units for the device |
hipDeviceAttributeAmdSpecificEnd | |
hipDeviceAttributeVendorSpecificBegin |
◆ hipExternalMemoryHandleType
◆ hipExternalSemaphoreHandleType
◆ hipFuncAttribute
enum hipFuncAttribute |
◆ hipFuncCache_t
enum hipFuncCache_t |
- Warning
- On AMD devices and some Nvidia devices, these hints and controls are ignored.
◆ hipGLDeviceList
enum hipGLDeviceList |
◆ hipGraphicsRegisterFlags
◆ hipJitOption
enum hipJitOption |
◆ hipMemoryAdvise
enum hipMemoryAdvise |
◆ hipMemRangeAttribute
enum hipMemRangeAttribute |
Enumerator | |
---|---|
hipMemRangeAttributeReadMostly | Whether the range will mostly be read and only occassionally be written to |
hipMemRangeAttributePreferredLocation | The preferred location of the range. |
hipMemRangeAttributeAccessedBy | Memory range has hipMemAdviseSetAccessedBy set for the specified device |
hipMemRangeAttributeLastPrefetchLocation | prefetched The last location to where the range was |
hipMemRangeAttributeCoherencyMode | Returns coherency mode hipMemRangeCoherencyMode for the range |
◆ hipMemRangeCoherencyMode
◆ hipSharedMemConfig
enum hipSharedMemConfig |
- Warning
- On AMD devices and some Nvidia devices, these hints and controls are ignored.