/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.2/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.2/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.2/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp Source File
block_scan.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_
31 #define HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_
32 
33 #include <type_traits>
34 
35 #include "../../../config.hpp"
36 
37 #include "../thread/thread_operators.hpp"
38 
39 #include <rocprim/block/block_scan.hpp>
40 
41 BEGIN_HIPCUB_NAMESPACE
42 
43 namespace detail
44 {
45  inline constexpr
46  typename std::underlying_type<::rocprim::block_scan_algorithm>::type
47  to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm v)
48  {
49  using utype = std::underlying_type<::rocprim::block_scan_algorithm>::type;
50  return static_cast<utype>(v);
51  }
52 }
53 
54 enum BlockScanAlgorithm
55 {
56  BLOCK_SCAN_RAKING
57  = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::reduce_then_scan),
58  BLOCK_SCAN_RAKING_MEMOIZE
59  = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::reduce_then_scan),
60  BLOCK_SCAN_WARP_SCANS
61  = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::using_warp_scan)
62 };
63 
64 template<
65  typename T,
66  int BLOCK_DIM_X,
67  BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING,
68  int BLOCK_DIM_Y = 1,
69  int BLOCK_DIM_Z = 1,
70  int ARCH = HIPCUB_ARCH /* ignored */
71 >
72 class BlockScan
73  : private ::rocprim::block_scan<
74  T,
75  BLOCK_DIM_X,
76  static_cast<::rocprim::block_scan_algorithm>(ALGORITHM),
77  BLOCK_DIM_Y,
78  BLOCK_DIM_Z
79  >
80 {
81  static_assert(
82  BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0,
83  "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0"
84  );
85 
86  using base_type =
87  typename ::rocprim::block_scan<
88  T,
89  BLOCK_DIM_X,
90  static_cast<::rocprim::block_scan_algorithm>(ALGORITHM),
91  BLOCK_DIM_Y,
92  BLOCK_DIM_Z
93  >;
94 
95  // Reference to temporary storage (usually shared memory)
96  typename base_type::storage_type& temp_storage_;
97 
98 public:
99  using TempStorage = typename base_type::storage_type;
100 
101  HIPCUB_DEVICE inline
102  BlockScan() : temp_storage_(private_storage())
103  {
104  }
105 
106  HIPCUB_DEVICE inline
107  BlockScan(TempStorage& temp_storage) : temp_storage_(temp_storage)
108  {
109  }
110 
111  HIPCUB_DEVICE inline
112  void InclusiveSum(T input, T& output)
113  {
114  base_type::inclusive_scan(input, output, temp_storage_);
115  }
116 
117  HIPCUB_DEVICE inline
118  void InclusiveSum(T input, T& output, T& block_aggregate)
119  {
120  base_type::inclusive_scan(input, output, block_aggregate, temp_storage_);
121  }
122 
123  template<typename BlockPrefixCallbackOp>
124  HIPCUB_DEVICE inline
125  void InclusiveSum(T input, T& output, BlockPrefixCallbackOp& block_prefix_callback_op)
126  {
127  base_type::inclusive_scan(
128  input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum()
129  );
130  }
131 
132  template<int ITEMS_PER_THREAD>
133  HIPCUB_DEVICE inline
134  void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])
135  {
136  base_type::inclusive_scan(input, output, temp_storage_);
137  }
138 
139  template<int ITEMS_PER_THREAD>
140  HIPCUB_DEVICE inline
141  void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
142  T& block_aggregate)
143  {
144  base_type::inclusive_scan(input, output, block_aggregate, temp_storage_);
145  }
146 
147  template<int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp>
148  HIPCUB_DEVICE inline
149  void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
150  BlockPrefixCallbackOp& block_prefix_callback_op)
151  {
152  base_type::inclusive_scan(
153  input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum()
154  );
155  }
156 
157  template<typename ScanOp>
158  HIPCUB_DEVICE inline
159  void InclusiveScan(T input, T& output, ScanOp scan_op)
160  {
161  base_type::inclusive_scan(input, output, temp_storage_, scan_op);
162  }
163 
164  template<typename ScanOp>
165  HIPCUB_DEVICE inline
166  void InclusiveScan(T input, T& output, ScanOp scan_op, T& block_aggregate)
167  {
168  base_type::inclusive_scan(input, output, block_aggregate, temp_storage_, scan_op);
169  }
170 
171  template<typename ScanOp, typename BlockPrefixCallbackOp>
172  HIPCUB_DEVICE inline
173  void InclusiveScan(T input, T& output, ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op)
174  {
175  base_type::inclusive_scan(
176  input, output, temp_storage_, block_prefix_callback_op, scan_op
177  );
178  }
179 
180  template<int ITEMS_PER_THREAD, typename ScanOp>
181  HIPCUB_DEVICE inline
182  void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)
183  {
184  base_type::inclusive_scan(input, output, temp_storage_, scan_op);
185  }
186 
187  template<int ITEMS_PER_THREAD, typename ScanOp>
188  HIPCUB_DEVICE inline
189  void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
190  ScanOp scan_op, T& block_aggregate)
191  {
192  base_type::inclusive_scan(input, output, block_aggregate, temp_storage_, scan_op);
193  }
194 
195  template<int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp>
196  HIPCUB_DEVICE inline
197  void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
198  ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op)
199  {
200  base_type::inclusive_scan(
201  input, output, temp_storage_, block_prefix_callback_op, scan_op
202  );
203  }
204 
205  HIPCUB_DEVICE inline
206  void ExclusiveSum(T input, T& output)
207  {
208  base_type::exclusive_scan(input, output, T(0), temp_storage_);
209  }
210 
211  HIPCUB_DEVICE inline
212  void ExclusiveSum(T input, T& output, T& block_aggregate)
213  {
214  base_type::exclusive_scan(input, output, T(0), block_aggregate, temp_storage_);
215  }
216 
217  template<typename BlockPrefixCallbackOp>
218  HIPCUB_DEVICE inline
219  void ExclusiveSum(T input, T& output, BlockPrefixCallbackOp& block_prefix_callback_op)
220  {
221  base_type::exclusive_scan(
222  input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum()
223  );
224  }
225 
226  template<int ITEMS_PER_THREAD>
227  HIPCUB_DEVICE inline
228  void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])
229  {
230  base_type::exclusive_scan(input, output, T(0), temp_storage_);
231  }
232 
233  template<int ITEMS_PER_THREAD>
234  HIPCUB_DEVICE inline
235  void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
236  T& block_aggregate)
237  {
238  base_type::exclusive_scan(input, output, T(0), block_aggregate, temp_storage_);
239  }
240 
241  template<int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp>
242  HIPCUB_DEVICE inline
243  void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
244  BlockPrefixCallbackOp& block_prefix_callback_op)
245  {
246  base_type::exclusive_scan(
247  input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum()
248  );
249  }
250 
251  template<typename ScanOp>
252  HIPCUB_DEVICE inline
253  void ExclusiveScan(T input, T& output, T initial_value, ScanOp scan_op)
254  {
255  base_type::exclusive_scan(input, output, initial_value, temp_storage_, scan_op);
256  }
257 
258  template<typename ScanOp>
259  HIPCUB_DEVICE inline
260  void ExclusiveScan(T input, T& output, T initial_value,
261  ScanOp scan_op, T& block_aggregate)
262  {
263  base_type::exclusive_scan(
264  input, output, initial_value, block_aggregate, temp_storage_, scan_op
265  );
266  }
267 
268  template<typename ScanOp, typename BlockPrefixCallbackOp>
269  HIPCUB_DEVICE inline
270  void ExclusiveScan(T input, T& output, ScanOp scan_op,
271  BlockPrefixCallbackOp& block_prefix_callback_op)
272  {
273  base_type::exclusive_scan(
274  input, output, temp_storage_, block_prefix_callback_op, scan_op
275  );
276  }
277 
278  template<int ITEMS_PER_THREAD, typename ScanOp>
279  HIPCUB_DEVICE inline
280  void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
281  T initial_value, ScanOp scan_op)
282  {
283  base_type::exclusive_scan(input, output, initial_value, temp_storage_, scan_op);
284  }
285 
286  template<int ITEMS_PER_THREAD, typename ScanOp>
287  HIPCUB_DEVICE inline
288  void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
289  T initial_value, ScanOp scan_op, T& block_aggregate)
290  {
291  base_type::exclusive_scan(
292  input, output, initial_value, block_aggregate, temp_storage_, scan_op
293  );
294  }
295 
296  template<int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp>
297  HIPCUB_DEVICE inline
298  void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD],
299  ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op)
300  {
301  base_type::exclusive_scan(
302  input, output, temp_storage_, block_prefix_callback_op, scan_op
303  );
304  }
305 
306 private:
307  HIPCUB_DEVICE inline
308  TempStorage& private_storage()
309  {
310  HIPCUB_SHARED_MEMORY TempStorage private_storage;
311  return private_storage;
312  }
313 };
314 
315 END_HIPCUB_NAMESPACE
316 
317 #endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_
Definition: block_scan.hpp:80
Definition: thread_operators.hpp:76