/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/host_utility/kernel_launch.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/host_utility/kernel_launch.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/host_utility/kernel_launch.hpp Source File
kernel_launch.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <hip/hip_runtime.h>
7 
8 #include "ck/ck.hpp"
9 #include "ck/stream_config.hpp"
11 
12 template <typename... Args, typename F>
13 float launch_and_time_kernel(const StreamConfig& stream_config,
14  F kernel,
15  dim3 grid_dim,
16  dim3 block_dim,
17  std::size_t lds_byte,
18  Args... args)
19 {
20 #if CK_TIME_KERNEL
21  if(stream_config.time_kernel_)
22  {
23  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
24  {
25  printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
26  __func__,
27  grid_dim.x,
28  grid_dim.y,
29  grid_dim.z,
30  block_dim.x,
31  block_dim.y,
32  block_dim.z);
33 
34  printf("Warm up %d times\n", stream_config.cold_niters_);
35  }
36  // warm up
37  for(int i = 0; i < stream_config.cold_niters_; ++i)
38  {
39  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
40  hip_check_error(hipGetLastError());
41  }
42 
43  const int nrepeat = stream_config.nrepeat_;
44  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
45  {
46  printf("Start running %d times...\n", nrepeat);
47  }
48  hipEvent_t start, stop;
49 
50  hip_check_error(hipEventCreate(&start));
51  hip_check_error(hipEventCreate(&stop));
52 
53  hip_check_error(hipDeviceSynchronize());
54  hip_check_error(hipEventRecord(start, stream_config.stream_id_));
55 
56  for(int i = 0; i < nrepeat; ++i)
57  {
58  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
59  hip_check_error(hipGetLastError());
60  }
61 
62  hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
63  hip_check_error(hipEventSynchronize(stop));
64 
65  float total_time = 0;
66 
67  hip_check_error(hipEventElapsedTime(&total_time, start, stop));
68 
69  hip_check_error(hipEventDestroy(start));
70  hip_check_error(hipEventDestroy(stop));
71 
72  return total_time / nrepeat;
73  }
74  else
75  {
76  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
77  hip_check_error(hipGetLastError());
78 
79  return 0;
80  }
81 #else
82  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
83  hip_check_error(hipGetLastError());
84 
85  return 0;
86 #endif
87 }
88 
89 template <typename... Args, typename F, typename PreProcessFunc>
91  PreProcessFunc preprocess,
92  F kernel,
93  dim3 grid_dim,
94  dim3 block_dim,
95  std::size_t lds_byte,
96  Args... args)
97 {
98 #if CK_TIME_KERNEL
99  if(stream_config.time_kernel_)
100  {
101  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
102  {
103  printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
104  __func__,
105  grid_dim.x,
106  grid_dim.y,
107  grid_dim.z,
108  block_dim.x,
109  block_dim.y,
110  block_dim.z);
111 
112  printf("Warm up %d times\n", stream_config.cold_niters_);
113  }
114  // warm up
115  preprocess();
116  for(int i = 0; i < stream_config.cold_niters_; ++i)
117  {
118  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
119  hip_check_error(hipGetLastError());
120  }
121 
122  const int nrepeat = stream_config.nrepeat_;
123  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
124  {
125  printf("Start running %d times...\n", nrepeat);
126  }
127  hipEvent_t start, stop;
128 
129  hip_check_error(hipEventCreate(&start));
130  hip_check_error(hipEventCreate(&stop));
131 
132  hip_check_error(hipDeviceSynchronize());
133  hip_check_error(hipEventRecord(start, stream_config.stream_id_));
134 
135  for(int i = 0; i < nrepeat; ++i)
136  {
137  preprocess();
138  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
139  hip_check_error(hipGetLastError());
140  }
141 
142  hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
143  hip_check_error(hipEventSynchronize(stop));
144 
145  float total_time = 0;
146 
147  hip_check_error(hipEventElapsedTime(&total_time, start, stop));
148 
149  hip_check_error(hipEventDestroy(start));
150  hip_check_error(hipEventDestroy(stop));
151 
152  return total_time / nrepeat;
153  }
154  else
155  {
156  preprocess();
157  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
158  hip_check_error(hipGetLastError());
159 
160  return 0;
161  }
162 #else
163  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
164  hip_check_error(hipGetLastError());
165 
166  return 0;
167 #endif
168 }
#define CK_ENV(name)
Definition: env.hpp:128
void hip_check_error(hipError_t x)
Definition: hip_check_error.hpp:10
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:13
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:90
bool EnvIsEnabled(EnvVar)
Definition: env.hpp:139
Definition: stream_config.hpp:10
int cold_niters_
Definition: stream_config.hpp:14
bool time_kernel_
Definition: stream_config.hpp:12
int nrepeat_
Definition: stream_config.hpp:15
hipStream_t stream_id_
Definition: stream_config.hpp:11