/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/util_allocator.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/util_allocator.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/util_allocator.hpp Source File
util_allocator.hpp
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_UTIL_ALLOCATOR_HPP_
31 #define HIPCUB_ROCPRIM_UTIL_ALLOCATOR_HPP_
32 
33 #include "../../config.hpp"
34 
35 #include <set>
36 #include <map>
37 #include <mutex>
38 
39 #include <math.h>
40 #include <stdio.h>
41 
42 BEGIN_HIPCUB_NAMESPACE
43 
44 #define _HipcubLog(format, ...) printf(format, __VA_ARGS__);
45 
46 // Hipified version of cub/util_allocator.cuh
47 
48 struct CachingDeviceAllocator
49 {
50  //---------------------------------------------------------------------
51  // Constants
52  //---------------------------------------------------------------------
53 
55  static const unsigned int INVALID_BIN = (unsigned int) -1;
56 
58  static const size_t INVALID_SIZE = (size_t) -1;
59 
61  static const int INVALID_DEVICE_ORDINAL = -1;
62 
63  //---------------------------------------------------------------------
64  // Type definitions and helper types
65  //---------------------------------------------------------------------
66 
71  {
72  void* d_ptr; // Device pointer
73  size_t bytes; // Size of allocation in bytes
74  unsigned int bin; // Bin enumeration
75  int device; // device ordinal
76  hipStream_t associated_stream; // Associated associated_stream
77  hipEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed
78 
79  // Constructor (suitable for searching maps for a specific block, given its pointer and device)
80  BlockDescriptor(void *d_ptr, int device) :
81  d_ptr(d_ptr),
82  bytes(0),
83  bin(INVALID_BIN),
84  device(device),
85  associated_stream(0),
86  ready_event(0)
87  {}
88 
89  // Constructor (suitable for searching maps for a range of suitable blocks, given a device)
90  BlockDescriptor(int device) :
91  d_ptr(NULL),
92  bytes(0),
93  bin(INVALID_BIN),
94  device(device),
95  associated_stream(0),
96  ready_event(0)
97  {}
98 
99  // Comparison functor for comparing device pointers
100  static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b)
101  {
102  if (a.device == b.device)
103  return (a.d_ptr < b.d_ptr);
104  else
105  return (a.device < b.device);
106  }
107 
108  // Comparison functor for comparing allocation sizes
109  static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b)
110  {
111  if (a.device == b.device)
112  return (a.bytes < b.bytes);
113  else
114  return (a.device < b.device);
115  }
116  };
117 
119  typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &);
120 
121  class TotalBytes {
122  public:
123  size_t free;
124  size_t live;
125  TotalBytes() { free = live = 0; }
126  };
127 
129  typedef std::multiset<BlockDescriptor, Compare> CachedBlocks;
130 
132  typedef std::multiset<BlockDescriptor, Compare> BusyBlocks;
133 
135  typedef std::map<int, TotalBytes> GpuCachedBytes;
136 
137 
138  //---------------------------------------------------------------------
139  // Utility functions
140  //---------------------------------------------------------------------
141 
145  static unsigned int IntPow(
146  unsigned int base,
147  unsigned int exp)
148  {
149  unsigned int retval = 1;
150  while (exp > 0)
151  {
152  if (exp & 1) {
153  retval = retval * base; // multiply the result by the current base
154  }
155  base = base * base; // square the base
156  exp = exp >> 1; // divide the exponent in half
157  }
158  return retval;
159  }
160 
161 
166  unsigned int &power,
167  size_t &rounded_bytes,
168  unsigned int base,
169  size_t value)
170  {
171  power = 0;
172  rounded_bytes = 1;
173 
174  if (value * base < value)
175  {
176  // Overflow
177  power = sizeof(size_t) * 8;
178  rounded_bytes = size_t(0) - 1;
179  return;
180  }
181 
182  while (rounded_bytes < value)
183  {
184  rounded_bytes *= base;
185  power++;
186  }
187  }
188 
189 
190  //---------------------------------------------------------------------
191  // Fields
192  //---------------------------------------------------------------------
193 
194  std::mutex mutex;
195 
196  unsigned int bin_growth;
197  unsigned int min_bin;
198  unsigned int max_bin;
199 
200  size_t min_bin_bytes;
201  size_t max_bin_bytes;
203 
204  const bool skip_cleanup;
205  bool debug;
206 
210 
211  //---------------------------------------------------------------------
212  // Methods
213  //---------------------------------------------------------------------
214 
219  unsigned int bin_growth,
220  unsigned int min_bin = 1,
221  unsigned int max_bin = INVALID_BIN,
222  size_t max_cached_bytes = INVALID_SIZE,
223  bool skip_cleanup = false,
224  bool debug = false)
225  :
226  bin_growth(bin_growth),
227  min_bin(min_bin),
228  max_bin(max_bin),
229  min_bin_bytes(IntPow(bin_growth, min_bin)),
230  max_bin_bytes(IntPow(bin_growth, max_bin)),
231  max_cached_bytes(max_cached_bytes),
232  skip_cleanup(skip_cleanup),
233  debug(debug),
234  cached_blocks(BlockDescriptor::SizeCompare),
235  live_blocks(BlockDescriptor::PtrCompare)
236  {}
237 
238 
253  bool skip_cleanup = false,
254  bool debug = false)
255  :
256  bin_growth(8),
257  min_bin(3),
258  max_bin(7),
259  min_bin_bytes(IntPow(bin_growth, min_bin)),
260  max_bin_bytes(IntPow(bin_growth, max_bin)),
261  max_cached_bytes((max_bin_bytes * 3) - 1),
262  skip_cleanup(skip_cleanup),
263  debug(debug),
264  cached_blocks(BlockDescriptor::SizeCompare),
265  live_blocks(BlockDescriptor::PtrCompare)
266  {}
267 
268 
275  hipError_t SetMaxCachedBytes(
276  size_t max_cached_bytes)
277  {
278  // Lock
279  mutex.lock();
280 
281  if (debug) _HipcubLog("Changing max_cached_bytes (%lld -> %lld)\n", (long long) this->max_cached_bytes, (long long) max_cached_bytes);
282 
283  this->max_cached_bytes = max_cached_bytes;
284 
285  // Unlock
286  mutex.unlock();
287 
288  return hipSuccess;
289  }
290 
291 
299  hipError_t DeviceAllocate(
300  int device,
301  void **d_ptr,
302  size_t bytes,
303  hipStream_t active_stream = 0)
304  {
305  *d_ptr = NULL;
306  int entrypoint_device = INVALID_DEVICE_ORDINAL;
307  hipError_t error = hipSuccess;
308 
309  if (device == INVALID_DEVICE_ORDINAL)
310  {
311  if (HipcubDebug(error = hipGetDevice(&entrypoint_device))) return error;
312  device = entrypoint_device;
313  }
314 
315  // Create a block descriptor for the requested allocation
316  bool found = false;
317  BlockDescriptor search_key(device);
318  search_key.associated_stream = active_stream;
319  NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes);
320 
321  if (search_key.bin > max_bin)
322  {
323  // Bin is greater than our maximum bin: allocate the request
324  // exactly and give out-of-bounds bin. It will not be cached
325  // for reuse when returned.
326  search_key.bin = INVALID_BIN;
327  search_key.bytes = bytes;
328  }
329  else
330  {
331  // Search for a suitable cached allocation: lock
332  mutex.lock();
333 
334  if (search_key.bin < min_bin)
335  {
336  // Bin is less than minimum bin: round up
337  search_key.bin = min_bin;
338  search_key.bytes = min_bin_bytes;
339  }
340 
341  // Iterate through the range of cached blocks on the same device in the same bin
342  CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key);
343  while ((block_itr != cached_blocks.end())
344  && (block_itr->device == device)
345  && (block_itr->bin == search_key.bin))
346  {
347  // To prevent races with reusing blocks returned by the host but still
348  // in use by the device, only consider cached blocks that are
349  // either (from the active stream) or (from an idle stream)
350  if ((active_stream == block_itr->associated_stream) ||
351  (hipEventQuery(block_itr->ready_event) != hipErrorNotReady))
352  {
353  // Reuse existing cache block. Insert into live blocks.
354  found = true;
355  search_key = *block_itr;
356  search_key.associated_stream = active_stream;
357  live_blocks.insert(search_key);
358 
359  // Remove from free blocks
360  cached_bytes[device].free -= search_key.bytes;
361  cached_bytes[device].live += search_key.bytes;
362 
363  if (debug) _HipcubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n",
364  device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream);
365 
366  cached_blocks.erase(block_itr);
367 
368  break;
369  }
370  block_itr++;
371  }
372 
373  // Done searching: unlock
374  mutex.unlock();
375  }
376 
377  // Allocate the block if necessary
378  if (!found)
379  {
380  // Set runtime's current device to specified device (entrypoint may not be set)
381  if (device != entrypoint_device)
382  {
383  if (HipcubDebug(error = hipGetDevice(&entrypoint_device))) return error;
384  if (HipcubDebug(error = hipSetDevice(device))) return error;
385  }
386 
387  // Attempt to allocate
388  if (HipcubDebug(error = hipMalloc(&search_key.d_ptr, search_key.bytes)) == hipErrorMemoryAllocation)
389  {
390  // The allocation attempt failed: free all cached blocks on device and retry
391  if (debug) _HipcubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations",
392  device, (long long) search_key.bytes, (long long) search_key.associated_stream);
393 
394  error = hipGetLastError(); // Reset error
395  error = hipSuccess; // Reset the error we will return
396 
397  // Lock
398  mutex.lock();
399 
400  // Iterate the range of free blocks on the same device
401  BlockDescriptor free_key(device);
402  CachedBlocks::iterator block_itr = cached_blocks.lower_bound(free_key);
403 
404  while ((block_itr != cached_blocks.end()) && (block_itr->device == device))
405  {
406  // No need to worry about synchronization with the device: hipFree is
407  // blocking and will synchronize across all kernels executing
408  // on the current device
409 
410  // Free device memory and destroy stream event.
411  if (HipcubDebug(error = hipFree(block_itr->d_ptr))) break;
412  if (HipcubDebug(error = hipEventDestroy(block_itr->ready_event))) break;
413 
414  // Reduce balance and erase entry
415  cached_bytes[device].free -= block_itr->bytes;
416 
417  if (debug) _HipcubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
418  device, (long long) block_itr->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live);
419 
420  cached_blocks.erase(block_itr);
421 
422  block_itr++;
423  }
424 
425  // Unlock
426  mutex.unlock();
427 
428  // Return under error
429  if (error) return error;
430 
431  // Try to allocate again
432  if (HipcubDebug(error = hipMalloc(&search_key.d_ptr, search_key.bytes))) return error;
433  }
434 
435  // Create ready event
436  if (HipcubDebug(error = hipEventCreateWithFlags(&search_key.ready_event, hipEventDisableTiming)))
437  return error;
438 
439  // Insert into live blocks
440  mutex.lock();
441  live_blocks.insert(search_key);
442  cached_bytes[device].live += search_key.bytes;
443  mutex.unlock();
444 
445  if (debug) _HipcubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld).\n",
446  device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream);
447 
448  // Attempt to revert back to previous device if necessary
449  if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device))
450  {
451  if (HipcubDebug(error = hipSetDevice(entrypoint_device))) return error;
452  }
453  }
454 
455  // Copy device pointer to output parameter
456  *d_ptr = search_key.d_ptr;
457 
458  if (debug) _HipcubLog("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
459  (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live);
460 
461  return error;
462  }
463 
464 
472  hipError_t DeviceAllocate(
473  void **d_ptr,
474  size_t bytes,
475  hipStream_t active_stream = 0)
476  {
477  return DeviceAllocate(INVALID_DEVICE_ORDINAL, d_ptr, bytes, active_stream);
478  }
479 
480 
488  hipError_t DeviceFree(
489  int device,
490  void* d_ptr)
491  {
492  int entrypoint_device = INVALID_DEVICE_ORDINAL;
493  hipError_t error = hipSuccess;
494 
495  if (device == INVALID_DEVICE_ORDINAL)
496  {
497  if (HipcubDebug(error = hipGetDevice(&entrypoint_device)))
498  return error;
499  device = entrypoint_device;
500  }
501 
502  // Lock
503  mutex.lock();
504 
505  // Find corresponding block descriptor
506  bool recached = false;
507  BlockDescriptor search_key(d_ptr, device);
508  BusyBlocks::iterator block_itr = live_blocks.find(search_key);
509  if (block_itr != live_blocks.end())
510  {
511  // Remove from live blocks
512  search_key = *block_itr;
513  live_blocks.erase(block_itr);
514  cached_bytes[device].live -= search_key.bytes;
515 
516  // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
517  if ((search_key.bin != INVALID_BIN) && (cached_bytes[device].free + search_key.bytes <= max_cached_bytes))
518  {
519  // Insert returned allocation into free blocks
520  recached = true;
521  cached_blocks.insert(search_key);
522  cached_bytes[device].free += search_key.bytes;
523 
524  if (debug) _HipcubLog("\tDevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
525  device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(),
526  (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live);
527  }
528  }
529 
530  // First set to specified device (entrypoint may not be set)
531  if (device != entrypoint_device)
532  {
533  if (HipcubDebug(error = hipGetDevice(&entrypoint_device))) return error;
534  if (HipcubDebug(error = hipSetDevice(device))) return error;
535  }
536 
537  if (recached)
538  {
539  // Insert the ready event in the associated stream (must have current device set properly)
540  if (HipcubDebug(error = hipEventRecord(search_key.ready_event, search_key.associated_stream))) return error;
541  }
542 
543  // Unlock
544  mutex.unlock();
545 
546  if (!recached)
547  {
548  // Free the allocation from the runtime and cleanup the event.
549  if (HipcubDebug(error = hipFree(d_ptr))) return error;
550  if (HipcubDebug(error = hipEventDestroy(search_key.ready_event))) return error;
551 
552  if (debug) _HipcubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
553  device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live);
554  }
555 
556  // Reset device
557  if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device))
558  {
559  if (HipcubDebug(error = hipSetDevice(entrypoint_device))) return error;
560  }
561 
562  return error;
563  }
564 
565 
573  hipError_t DeviceFree(
574  void* d_ptr)
575  {
576  return DeviceFree(INVALID_DEVICE_ORDINAL, d_ptr);
577  }
578 
579 
583  hipError_t FreeAllCached()
584  {
585  hipError_t error = hipSuccess;
586  int entrypoint_device = INVALID_DEVICE_ORDINAL;
587  int current_device = INVALID_DEVICE_ORDINAL;
588 
589  mutex.lock();
590 
591  while (!cached_blocks.empty())
592  {
593  // Get first block
594  CachedBlocks::iterator begin = cached_blocks.begin();
595 
596  // Get entry-point device ordinal if necessary
597  if (entrypoint_device == INVALID_DEVICE_ORDINAL)
598  {
599  if (HipcubDebug(error = hipGetDevice(&entrypoint_device))) break;
600  }
601 
602  // Set current device ordinal if necessary
603  if (begin->device != current_device)
604  {
605  if (HipcubDebug(error = hipSetDevice(begin->device))) break;
606  current_device = begin->device;
607  }
608 
609  // Free device memory
610  if (HipcubDebug(error = hipFree(begin->d_ptr))) break;
611  if (HipcubDebug(error = hipEventDestroy(begin->ready_event))) break;
612 
613  // Reduce balance and erase entry
614  cached_bytes[current_device].free -= begin->bytes;
615 
616  if (debug) _HipcubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
617  current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].live);
618 
619  cached_blocks.erase(begin);
620  }
621 
622  mutex.unlock();
623 
624  // Attempt to revert back to entry-point device if necessary
625  if (entrypoint_device != INVALID_DEVICE_ORDINAL)
626  {
627  if (HipcubDebug(error = hipSetDevice(entrypoint_device))) return error;
628  }
629 
630  return error;
631  }
632 
633 
638  {
639  if (!skip_cleanup)
640  FreeAllCached();
641  }
642 
643 };
644 
645 END_HIPCUB_NAMESPACE
646 
647 #endif // HIPCUB_ROCPRIM_UTIL_ALLOCATOR_HPP_
Definition: util_allocator.hpp:121
Definition: util_allocator.hpp:71
void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value)
Definition: util_allocator.hpp:165
unsigned int min_bin
Geometric growth factor for bin-sizes.
Definition: util_allocator.hpp:197
hipError_t DeviceFree(int device, void *d_ptr)
Frees a live allocation of device memory on the specified device, returning it to the allocator.
Definition: util_allocator.hpp:488
std::multiset< BlockDescriptor, Compare > CachedBlocks
Set type for cached blocks (ordered by size)
Definition: util_allocator.hpp:129
unsigned int bin_growth
Mutex for thread-safety.
Definition: util_allocator.hpp:196
size_t max_bin_bytes
Minimum bin size.
Definition: util_allocator.hpp:201
CachedBlocks cached_blocks
Map of device ordinal to aggregate cached bytes on that device.
Definition: util_allocator.hpp:208
GpuCachedBytes cached_bytes
Whether or not to print (de)allocation events to stdout.
Definition: util_allocator.hpp:207
CachingDeviceAllocator(unsigned int bin_growth, unsigned int min_bin=1, unsigned int max_bin=INVALID_BIN, size_t max_cached_bytes=INVALID_SIZE, bool skip_cleanup=false, bool debug=false)
Set of live device allocations currently in use.
Definition: util_allocator.hpp:218
size_t min_bin_bytes
Maximum bin enumeration.
Definition: util_allocator.hpp:200
hipError_t DeviceFree(void *d_ptr)
Frees a live allocation of device memory on the current device, returning it to the allocator.
Definition: util_allocator.hpp:573
bool debug
Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may hav...
Definition: util_allocator.hpp:205
unsigned int max_bin
Minimum bin enumeration.
Definition: util_allocator.hpp:198
std::map< int, TotalBytes > GpuCachedBytes
Map type of device ordinals to the number of cached bytes cached by each device.
Definition: util_allocator.hpp:135
std::multiset< BlockDescriptor, Compare > BusyBlocks
Set type for live blocks (ordered by ptr)
Definition: util_allocator.hpp:132
CachingDeviceAllocator(bool skip_cleanup=false, bool debug=false)
Default constructor.
Definition: util_allocator.hpp:252
hipError_t FreeAllCached()
Frees all cached device allocations on all devices.
Definition: util_allocator.hpp:583
static unsigned int IntPow(unsigned int base, unsigned int exp)
Definition: util_allocator.hpp:145
hipError_t SetMaxCachedBytes(size_t max_cached_bytes)
Sets the limit on the number bytes this allocator is allowed to cache per device.
Definition: util_allocator.hpp:275
virtual ~CachingDeviceAllocator()
Destructor.
Definition: util_allocator.hpp:637
BusyBlocks live_blocks
Set of cached device allocations available for reuse.
Definition: util_allocator.hpp:209
size_t max_cached_bytes
Maximum bin size.
Definition: util_allocator.hpp:202
hipError_t DeviceAllocate(int device, void **d_ptr, size_t bytes, hipStream_t active_stream=0)
Provides a suitable allocation of device memory for the given size on the specified device.
Definition: util_allocator.hpp:299
const bool skip_cleanup
Maximum aggregate cached bytes per device.
Definition: util_allocator.hpp:204
hipError_t DeviceAllocate(void **d_ptr, size_t bytes, hipStream_t active_stream=0)
Provides a suitable allocation of device memory for the given size on the current device.
Definition: util_allocator.hpp:472