/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/device_memory.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/device_memory.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/device_memory.hpp Source File
device_memory.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <hip/hip_runtime.h>
7 #include <stdint.h>
8 #include <stdexcept>
11 
12 namespace ck_tile {
13 template <typename T>
14 __global__ void set_buffer_value(T* p, T x, uint64_t buffer_element_size)
15 {
16  for(uint64_t i = threadIdx.x; i < buffer_element_size; i += blockDim.x)
17  {
18  p[i] = x;
19  }
20 }
21 
50 struct DeviceMem
51 
52 {
53  DeviceMem() : mpDeviceBuf(nullptr), mMemSize(0) {}
54  DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
55  {
56  if(mMemSize != 0)
57  {
58  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
59  }
60  else
61  {
62  mpDeviceBuf = nullptr;
63  }
64  }
65  template <typename T>
66  DeviceMem(const HostTensor<T>& t) : mMemSize(t.get_element_space_size_in_bytes())
67  {
68  if(mMemSize != 0)
69  {
70  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
71  }
72  else
73  {
74  mpDeviceBuf = nullptr;
75  }
76  ToDevice(t.data());
77  }
78  void Realloc(std::size_t mem_size)
79  {
80  if(mpDeviceBuf)
81  {
82  HIP_CHECK_ERROR(hipFree(mpDeviceBuf));
83  }
84  mMemSize = mem_size;
85  if(mMemSize != 0)
86  {
87  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
88  }
89  else
90  {
91  mpDeviceBuf = nullptr;
92  }
93  }
94  void* GetDeviceBuffer() const { return mpDeviceBuf; }
95  std::size_t GetBufferSize() const { return mMemSize; }
96  void ToDevice(const void* p) const
97  {
98  if(mpDeviceBuf)
99  {
101  hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
102  }
103  // else
104  // {
105  // throw std::runtime_error("ToDevice with an empty pointer");
106  // }
107  }
108  void ToDevice(const void* p, const std::size_t cpySize) const
109  {
110  if(mpDeviceBuf)
111  {
113  hipMemcpy(mpDeviceBuf, const_cast<void*>(p), cpySize, hipMemcpyHostToDevice));
114  }
115  }
116  void FromDevice(void* p) const
117  {
118  if(mpDeviceBuf)
119  {
120  HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
121  }
122  // else
123  // {
124  // throw std::runtime_error("FromDevice with an empty pointer");
125  // }
126  }
127  void FromDevice(void* p, const std::size_t cpySize) const
128  {
129  if(mpDeviceBuf)
130  {
131  HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
132  }
133  }
134 
135  // construct a host tensor with type T
136  template <typename T>
137  HostTensor<T> ToHost(std::size_t cpySize)
138  {
139  // TODO: host tensor could be slightly larger than the device tensor
140  // we just copy all data from GPU buffer
141  std::size_t host_elements = (cpySize + sizeof(T) - 1) / sizeof(T);
142  HostTensor<T> h_({host_elements});
143  if(mpDeviceBuf)
144  {
145  HIP_CHECK_ERROR(hipMemcpy(h_.data(), mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
146  }
147  return h_;
148  }
149  template <typename T>
151  {
152  return ToHost<T>(mMemSize);
153  }
154 
155  void SetZero() const
156  {
157  if(mpDeviceBuf)
158  {
159  HIP_CHECK_ERROR(hipMemset(mpDeviceBuf, 0, mMemSize));
160  }
161  }
162  template <typename T>
163  void SetValue(T x) const
164  {
165  if(mpDeviceBuf)
166  {
167  if(mMemSize % sizeof(T) != 0)
168  {
169  throw std::runtime_error("wrong! not entire DeviceMem will be set");
170  }
171 
172  // TODO: call a gpu kernel to set the value (?)
173  set_buffer_value<T><<<1, 1024>>>(static_cast<T*>(mpDeviceBuf), x, mMemSize / sizeof(T));
174  }
175  }
177  {
178  if(mpDeviceBuf)
179  {
180  try
181  {
182  HIP_CHECK_ERROR(hipFree(mpDeviceBuf));
183  }
184  catch(std::runtime_error& re)
185  {
186  std::cerr << re.what() << std::endl;
187  }
188  }
189  }
190 
191  void* mpDeviceBuf;
192  std::size_t mMemSize;
193 };
194 
195 } // namespace ck_tile
#define HIP_CHECK_ERROR(retval_or_funcall)
Definition: hip_check_error.hpp:21
Definition: cluster_descriptor.hpp:13
__global__ void set_buffer_value(T *p, T x, uint64_t buffer_element_size)
Definition: device_memory.hpp:14
unsigned __int64 uint64_t
Definition: stdint.h:136
Manages device memory allocation and host-device data transfers.
Definition: device_memory.hpp:52
DeviceMem()
Definition: device_memory.hpp:53
DeviceMem(std::size_t mem_size)
Definition: device_memory.hpp:54
void SetValue(T x) const
Definition: device_memory.hpp:163
void ToDevice(const void *p, const std::size_t cpySize) const
Definition: device_memory.hpp:108
void Realloc(std::size_t mem_size)
Definition: device_memory.hpp:78
std::size_t mMemSize
size of device buffer in bytes
Definition: device_memory.hpp:192
DeviceMem(const HostTensor< T > &t)
Definition: device_memory.hpp:66
void * GetDeviceBuffer() const
Definition: device_memory.hpp:94
void FromDevice(void *p) const
Definition: device_memory.hpp:116
void SetZero() const
Definition: device_memory.hpp:155
void FromDevice(void *p, const std::size_t cpySize) const
Definition: device_memory.hpp:127
HostTensor< T > ToHost()
Definition: device_memory.hpp:150
std::size_t GetBufferSize() const
Definition: device_memory.hpp:95
~DeviceMem()
Definition: device_memory.hpp:176
HostTensor< T > ToHost(std::size_t cpySize)
Definition: device_memory.hpp:137
void * mpDeviceBuf
pointer to device buffer
Definition: device_memory.hpp:191
void ToDevice(const void *p) const
Definition: device_memory.hpp:96
Definition: host_tensor.hpp:336
Data::pointer data()
Definition: host_tensor.hpp:591