25 #include <hip/hip_runtime.h>
32 template<
typename T,
typename SrcWrapper>
34 extern __shared__ __align__(
sizeof(T))
unsigned char smem[];
35 T *local_histogram =
reinterpret_cast<T *
>(smem);
37 const auto z_idx = blockIdx.z;
38 const auto gid = blockIdx.x * blockDim.x + threadIdx.x;
39 const auto x_idx = gid % input.width();
40 const auto y_idx = gid / input.width();
43 const auto tid = threadIdx.x;
45 local_histogram[tid] = 0;
49 if (gid < input.height() * input.width()) {
50 atomicAdd(&local_histogram[input.at(z_idx, y_idx, x_idx, 0).x], 1);
54 const auto hist_val = local_histogram[tid];
58 atomicAdd(&histogram.
at(z_idx, tid, 0), hist_val);
62 template <
typename T,
typename SrcWrapper,
typename MaskWrapper>
64 extern __shared__ __align__(
sizeof(T))
unsigned char smem[];
65 T *local_histogram =
reinterpret_cast<T *
>(smem);
67 const auto z_idx = blockIdx.z;
68 const auto gid = blockIdx.x * blockDim.x + threadIdx.x;
69 const auto x_idx = gid % input.width();
70 const auto y_idx = gid / input.width();
73 const auto tid = threadIdx.x;
75 local_histogram[tid] = 0;
79 if (gid < input.height() * input.width()) {
80 if (mask.at(z_idx, y_idx, x_idx, 0) != 0) {
82 &local_histogram[input.at(z_idx, y_idx, x_idx, 0).x],
88 const auto hist_val = local_histogram[tid];
92 atomicAdd(&histogram.
at(z_idx, tid, 0), hist_val);
Definition: generic_tensor_wrapper.hpp:28
__device__ __host__ T & at(ARGS... idx)
Definition: generic_tensor_wrapper.hpp:48
__global__ void histogram_kernel(SrcWrapper input, roccv::GenericTensorWrapper< T > histogram)
Definition: histogram_device.hpp:33
Definition: non_max_suppression_helpers.hpp:26