/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/thread/thread_scan.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/thread/thread_scan.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/thread/thread_scan.hpp Source File
thread_scan.hpp
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIBCUB_ROCPRIM_THREAD_THREAD_SCAN_HPP_
31 #define HIBCUB_ROCPRIM_THREAD_THREAD_SCAN_HPP_
32 
33 
34 #include "../../../config.hpp"
35 #include "../util_type.hpp"
36 
37 BEGIN_HIPCUB_NAMESPACE
38 
40 namespace internal {
41 
52  template <
53  int LENGTH,
54  typename T,
55  typename ScanOp>
56  __device__ __forceinline__ T ThreadScanExclusive(
57  T inclusive,
58  T exclusive,
59  T *input,
60  T *output,
61  ScanOp scan_op,
62  Int2Type<LENGTH> /*length*/)
63  {
64  #pragma unroll
65  for (int i = 0; i < LENGTH; ++i)
66  {
67  inclusive = scan_op(exclusive, input[i]);
68  output[i] = exclusive;
69  exclusive = inclusive;
70  }
71 
72  return inclusive;
73  }
74 
75  #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
76 
84  template <
85  int LENGTH,
86  typename T,
87  typename ScanOp>
88  __device__ __forceinline__ T ThreadScanExclusive(
89  T *input,
90  T *output,
91  ScanOp scan_op,
92  T prefix,
93  bool apply_prefix = true)
94  {
95  T inclusive = input[0];
96  if (apply_prefix)
97  {
98  inclusive = scan_op(prefix, inclusive);
99  }
100  output[0] = prefix;
101  T exclusive = inclusive;
102 
103  return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
104  }
105 
113  template <
114  int LENGTH,
115  typename T,
116  typename ScanOp>
117  __device__ __forceinline__ T ThreadScanExclusive(
118  T (&input)[LENGTH],
119  T (&output)[LENGTH],
120  ScanOp scan_op,
121  T prefix,
122  bool apply_prefix = true)
123  {
124  return ThreadScanExclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
125  }
126 
127  #endif
128 
129  template <
130  int LENGTH,
131  typename T,
132  typename ScanOp>
133  __device__ __forceinline__ T ThreadScanInclusive(
134  T inclusive,
135  T *input,
136  T *output,
137  ScanOp scan_op,
138  Int2Type<LENGTH> /*length*/)
139  {
140  #pragma unroll
141  for (int i = 0; i < LENGTH; ++i)
142  {
143  inclusive = scan_op(inclusive, input[i]);
144  output[i] = inclusive;
145  }
146 
147  return inclusive;
148  }
149 
150 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
151 
159  template <
160  int LENGTH,
161  typename T,
162  typename ScanOp>
163  __device__ __forceinline__ T ThreadScanInclusive(
164  T *input,
165  T *output,
166  ScanOp scan_op)
167  {
168  T inclusive = input[0];
169  output[0] = inclusive;
170 
171  // Continue scan
172  return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
173  }
174 
182  template <
183  int LENGTH,
184  typename T,
185  typename ScanOp>
186  __device__ __forceinline__ T ThreadScanInclusive(
187  T (&input)[LENGTH],
188  T (&output)[LENGTH],
189  ScanOp scan_op)
190  {
191  return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op);
192  }
193 
201  template <
202  int LENGTH,
203  typename T,
204  typename ScanOp>
205  __device__ __forceinline__ T ThreadScanInclusive(
206  T *input,
207  T *output,
208  ScanOp scan_op,
209  T prefix,
210  bool apply_prefix = true)
211  {
212  T inclusive = input[0];
213  if (apply_prefix)
214  {
215  inclusive = scan_op(prefix, inclusive);
216  }
217  output[0] = inclusive;
218 
219  // Continue scan
220  return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
221  }
222 
230  template <
231  int LENGTH,
232  typename T,
233  typename ScanOp>
234  __device__ __forceinline__ T ThreadScanInclusive(
235  T (&input)[LENGTH],
236  T (&output)[LENGTH],
237  ScanOp scan_op,
238  T prefix,
239  bool apply_prefix = true)
240  {
241  return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
242  }
243 
244  #endif
245 
247  // end group UtilModule
249 
250 
251  } // internal namespace
252 
253  END_HIPCUB_NAMESPACE
254 
255  #endif // HIBCUB_ROCPRIM_THREAD_THREAD_SCAN_HPP_
__device__ __forceinline__ T ThreadScanInclusive(T inclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
Definition: thread_scan.hpp:133
__device__ __forceinline__ T ThreadScanExclusive(T inclusive, T exclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
Definition: thread_scan.hpp:56
Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB install...
Definition: thread_reduce.hpp:36
Definition: util_type.hpp:144