/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-roccv/checkouts/latest/include/kernels/device/non_max_suppression_device.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-roccv/checkouts/latest/include/kernels/device/non_max_suppression_device.hpp Source File#

3 min read time

Applies to Linux

rocCV: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-roccv/checkouts/latest/include/kernels/device/non_max_suppression_device.hpp Source File
non_max_suppression_device.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21 
22 #pragma once
23 
24 #include <hip/hip_runtime.h>
25 
26 #include "core/tensor.hpp"
29 
30 namespace Kernels {
31 namespace Device {
34  roccv::GenericTensorWrapper<float> scores, int numBoxes, float scoresThreshold,
35  float iouThreshold) {
36  const int boxAIdx = blockDim.x * blockIdx.x + threadIdx.x;
37  if (boxAIdx >= numBoxes) return;
38 
39  const int batchIdx = blockIdx.z;
40  const float scoreA = scores.at(batchIdx, boxAIdx);
41  uint8_t& dst = output.at(batchIdx, boxAIdx);
42 
43  if (scoreA < scoresThreshold) {
44  dst = 0u;
45  return;
46  }
47 
48  const short4 boxA = input.at(batchIdx, boxAIdx);
49  bool discard = false;
50 
51  for (int boxBIdx = 0; boxBIdx < numBoxes; boxBIdx++) {
52  if (boxBIdx == boxAIdx) continue;
53 
54  const short4 boxB = input.at(batchIdx, boxBIdx);
55  if (ComputeIoU(boxA, boxB) > iouThreshold) {
56  const float scoreB = scores.at(batchIdx, boxBIdx);
57  if (scoreA < scoreB || (scoreA == scoreB && ComputeArea(boxA) < ComputeArea(boxB))) {
58  discard = true;
59  break;
60  }
61  }
62  }
63 
64  dst = discard ? 0u : 1u;
65 }
66 } // namespace Device
67 } // namespace Kernels
Definition: generic_tensor_wrapper.hpp:28
__device__ __host__ T & at(ARGS... idx)
Definition: generic_tensor_wrapper.hpp:48
__global__ void non_maximum_suppression(roccv::GenericTensorWrapper< short4 > input, roccv::GenericTensorWrapper< uint8_t > output, roccv::GenericTensorWrapper< float > scores, int numBoxes, float scoresThreshold, float iouThreshold)
Definition: non_max_suppression_device.hpp:32
Definition: non_max_suppression_helpers.hpp:26
__device__ __host__ float ComputeArea(const short4 &box)
Definition: non_max_suppression_helpers.hpp:27
__device__ __host__ float ComputeIoU(const short4 &boxA, const short4 &boxB)
Definition: non_max_suppression_helpers.hpp:29