6 #include <hip/hip_runtime.h>
15 template <
typename... Args,
typename F>
28 printf(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
37 printf(
"Warm up %d times\n", stream_config.
cold_niters_);
42 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
46 const int nrepeat = stream_config.
nrepeat_;
49 printf(
"Start running %d times...\n", nrepeat);
51 hipEvent_t start, stop;
59 for(
int i = 0; i < nrepeat; ++i)
61 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
75 return total_time / nrepeat;
79 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
85 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
92 template <
typename... Args,
typename F,
typename PreProcessFunc>
94 PreProcessFunc preprocess,
106 printf(
"%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
115 printf(
"Warm up %d times\n", stream_config.
cold_niters_);
121 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
125 const int nrepeat = stream_config.
nrepeat_;
128 printf(
"Start running %d times...\n", nrepeat);
130 hipEvent_t start, stop;
138 for(
int i = 0; i < nrepeat; ++i)
141 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
148 float total_time = 0;
155 return total_time / nrepeat;
160 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
166 kernel<<<grid_dim, block_dim, lds_byte, stream_config.
stream_id_>>>(args...);
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:93
void hip_check_error(hipError_t x)
Definition: hip_check_error.hpp:12
bool EnvIsEnabled(EnvVar)
Definition: env.hpp:139
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:16
Definition: stream_config.hpp:10
int cold_niters_
Definition: stream_config.hpp:14
bool time_kernel_
Definition: stream_config.hpp:12
int nrepeat_
Definition: stream_config.hpp:15
hipStream_t stream_id_
Definition: stream_config.hpp:11
#define CK_ENV(name)
Definition: env.hpp:128