8 #include <hip/hip_runtime.h>
12 template <
typename ADataType,
typename BDataType>
18 std::size_t rotating_count_,
23 rotating_count(rotating_count_),
27 p_a_grids.push_back(a_ptr);
28 p_b_grids.push_back(b_ptr);
29 for(
size_t i = 1; i < rotating_count; i++)
35 const_cast<void*
>(p_a_grids[0]),
37 hipMemcpyDeviceToDevice));
38 p_a_grids.push_back(pADeviceBuf);
45 const_cast<void*
>(p_b_grids[0]),
47 hipMemcpyDeviceToDevice));
48 p_b_grids.push_back(pBDeviceBuf);
54 if(rotating_count > 1)
56 std::size_t idx = iter++ % rotating_count;
57 a_ptr = p_a_grids[idx];
58 b_ptr = p_b_grids[idx];
63 std::cout <<
"RotatingMemWrapper: { size_a: " << size_a <<
", size_b: " << size_b
64 <<
", rotating_count: " << rotating_count <<
"}" << std::endl;
68 if(rotating_count > 1)
75 for(
size_t i = 1; i < rotating_count; i++)
87 std::size_t rotating_count = 1;
88 std::size_t size_a = 0;
89 std::size_t size_b = 0;
90 std::vector<const void*> p_a_grids;
91 std::vector<const void*> p_b_grids;
95 hipDeviceProp_t deviceProps;
97 int32_t gpu_block3 = deviceProps.multiProcessorCount * 60;
99 ck_tile::flush_cache<<<dim3(gpu_block3), dim3(64), 0,
nullptr>>>();
#define HIP_CHECK_ERROR(retval_or_funcall)
Definition: hip_check_error.hpp:21
Definition: cluster_descriptor.hpp:13
CK_TILE_HOST void hip_check_error(hipError_t x)
Definition: hip_check_error.hpp:13
int32_t int32_t
Definition: integer.hpp:10
void flush_icache()
Definition: rotating_buffers.hpp:93
Definition: rotating_buffers.hpp:14
void Print()
Definition: rotating_buffers.hpp:61
void Next()
Definition: rotating_buffers.hpp:52
RotatingMemWrapper(const void *a_ptr_, const void *b_ptr_, std::size_t rotating_count_, std::size_t size_a_, std::size_t size_b_)
Definition: rotating_buffers.hpp:16
RotatingMemWrapper()=delete
~RotatingMemWrapper() noexcept
Definition: rotating_buffers.hpp:66