/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp Source File
device_segmented_reduce.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_
31 #define HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_
32 
33 #include <limits>
34 #include <iterator>
35 
36 #include "../../../config.hpp"
37 
38 #include "../thread/thread_operators.hpp"
39 #include "../iterator/arg_index_input_iterator.hpp"
40 
41 #include <rocprim/device/device_segmented_reduce.hpp>
42 
43 BEGIN_HIPCUB_NAMESPACE
44 
45 struct DeviceSegmentedReduce
46 {
47  template<
48  typename InputIteratorT,
49  typename OutputIteratorT,
50  typename OffsetIteratorT,
51  typename ReductionOp,
52  typename T
53  >
54  HIPCUB_RUNTIME_FUNCTION static
55  hipError_t Reduce(void * d_temp_storage,
56  size_t& temp_storage_bytes,
57  InputIteratorT d_in,
58  OutputIteratorT d_out,
59  int num_segments,
60  OffsetIteratorT d_begin_offsets,
61  OffsetIteratorT d_end_offsets,
62  ReductionOp reduction_op,
63  T initial_value,
64  hipStream_t stream = 0,
65  bool debug_synchronous = false)
66  {
67  return ::rocprim::segmented_reduce(
68  d_temp_storage, temp_storage_bytes,
69  d_in, d_out,
70  num_segments, d_begin_offsets, d_end_offsets,
71  ::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(reduction_op),
72  initial_value,
73  stream, debug_synchronous
74  );
75  }
76 
77  template<
78  typename InputIteratorT,
79  typename OutputIteratorT,
80  typename OffsetIteratorT
81  >
82  HIPCUB_RUNTIME_FUNCTION static
83  hipError_t Sum(void * d_temp_storage,
84  size_t& temp_storage_bytes,
85  InputIteratorT d_in,
86  OutputIteratorT d_out,
87  int num_segments,
88  OffsetIteratorT d_begin_offsets,
89  OffsetIteratorT d_end_offsets,
90  hipStream_t stream = 0,
91  bool debug_synchronous = false)
92  {
93  using input_type = typename std::iterator_traits<InputIteratorT>::value_type;
94 
95  return Reduce(
96  d_temp_storage, temp_storage_bytes,
97  d_in, d_out,
98  num_segments, d_begin_offsets, d_end_offsets,
99  ::hipcub::Sum(), input_type(),
100  stream, debug_synchronous
101  );
102  }
103 
104  template<
105  typename InputIteratorT,
106  typename OutputIteratorT,
107  typename OffsetIteratorT
108  >
109  HIPCUB_RUNTIME_FUNCTION static
110  hipError_t Min(void * d_temp_storage,
111  size_t& temp_storage_bytes,
112  InputIteratorT d_in,
113  OutputIteratorT d_out,
114  int num_segments,
115  OffsetIteratorT d_begin_offsets,
116  OffsetIteratorT d_end_offsets,
117  hipStream_t stream = 0,
118  bool debug_synchronous = false)
119  {
120  using input_type = typename std::iterator_traits<InputIteratorT>::value_type;
121 
122  return Reduce(
123  d_temp_storage, temp_storage_bytes,
124  d_in, d_out,
125  num_segments, d_begin_offsets, d_end_offsets,
126  ::hipcub::Min(), std::numeric_limits<input_type>::max(),
127  stream, debug_synchronous
128  );
129  }
130 
131  template<
132  typename InputIteratorT,
133  typename OutputIteratorT,
134  typename OffsetIteratorT
135  >
136  HIPCUB_RUNTIME_FUNCTION static
137  hipError_t ArgMin(void * d_temp_storage,
138  size_t& temp_storage_bytes,
139  InputIteratorT d_in,
140  OutputIteratorT d_out,
141  int num_segments,
142  OffsetIteratorT d_begin_offsets,
143  OffsetIteratorT d_end_offsets,
144  hipStream_t stream = 0,
145  bool debug_synchronous = false)
146  {
147  using OffsetT = int;
148  using T = typename std::iterator_traits<InputIteratorT>::value_type;
149  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
150  using OutputTupleT = typename std::conditional<
151  std::is_same<O, void>::value,
152  KeyValuePair<OffsetT, T>,
153  O
154  >::type;
155 
156  using OutputValueT = typename OutputTupleT::Value;
157  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
158 
159  IteratorT d_indexed_in(d_in);
160  const OutputTupleT init(1, std::numeric_limits<T>::max());
161 
162  return Reduce(
163  d_temp_storage, temp_storage_bytes,
164  d_indexed_in, d_out,
165  num_segments, d_begin_offsets, d_end_offsets,
166  ::hipcub::ArgMin(), init,
167  stream, debug_synchronous
168  );
169  }
170 
171  template<
172  typename InputIteratorT,
173  typename OutputIteratorT,
174  typename OffsetIteratorT
175  >
176  HIPCUB_RUNTIME_FUNCTION static
177  hipError_t Max(void * d_temp_storage,
178  size_t& temp_storage_bytes,
179  InputIteratorT d_in,
180  OutputIteratorT d_out,
181  int num_segments,
182  OffsetIteratorT d_begin_offsets,
183  OffsetIteratorT d_end_offsets,
184  hipStream_t stream = 0,
185  bool debug_synchronous = false)
186  {
187  using input_type = typename std::iterator_traits<InputIteratorT>::value_type;
188 
189  return Reduce(
190  d_temp_storage, temp_storage_bytes,
191  d_in, d_out,
192  num_segments, d_begin_offsets, d_end_offsets,
193  ::hipcub::Max(), std::numeric_limits<input_type>::lowest(),
194  stream, debug_synchronous
195  );
196  }
197 
198  template<
199  typename InputIteratorT,
200  typename OutputIteratorT,
201  typename OffsetIteratorT
202  >
203  HIPCUB_RUNTIME_FUNCTION static
204  hipError_t ArgMax(void * d_temp_storage,
205  size_t& temp_storage_bytes,
206  InputIteratorT d_in,
207  OutputIteratorT d_out,
208  int num_segments,
209  OffsetIteratorT d_begin_offsets,
210  OffsetIteratorT d_end_offsets,
211  hipStream_t stream = 0,
212  bool debug_synchronous = false)
213  {
214  using OffsetT = int;
215  using T = typename std::iterator_traits<InputIteratorT>::value_type;
216  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
217  using OutputTupleT = typename std::conditional<
218  std::is_same<O, void>::value,
219  KeyValuePair<OffsetT, T>,
220  O
221  >::type;
222 
223  using OutputValueT = typename OutputTupleT::Value;
224  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
225 
226  IteratorT d_indexed_in(d_in);
227  const OutputTupleT init(1, std::numeric_limits<T>::lowest());
228 
229  return Reduce(
230  d_temp_storage, temp_storage_bytes,
231  d_indexed_in, d_out,
232  num_segments, d_begin_offsets, d_end_offsets,
233  ::hipcub::ArgMax(), init,
234  stream, debug_synchronous
235  );
236  }
237 };
238 
239 END_HIPCUB_NAMESPACE
240 
241 #endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_
Definition: thread_operators.hpp:126
Definition: thread_operators.hpp:141
Definition: thread_operators.hpp:106
Definition: thread_operators.hpp:116
Definition: thread_operators.hpp:76