/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File
device_reduce.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
31 #define HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
32 
33 #include <limits>
34 #include <iterator>
35 
36 #include <hip/hip_fp16.h> // __half
37 #include <hip/hip_bfloat16.h> // hip_bfloat16
38 
39 #include "../../../config.hpp"
40 #include "../iterator/arg_index_input_iterator.hpp"
41 #include "../thread/thread_operators.hpp"
42 
43 #include <rocprim/device/device_reduce.hpp>
44 #include <rocprim/device/device_reduce_by_key.hpp>
45 
46 BEGIN_HIPCUB_NAMESPACE
47 namespace detail
48 {
49 
50 template<class T>
51 inline
52 T get_lowest_value()
53 {
54  return std::numeric_limits<T>::lowest();
55 }
56 
57 template<>
58 inline
59 __half get_lowest_value<__half>()
60 {
61  unsigned short lowest_half = 0xfbff;
62  __half lowest_value = *reinterpret_cast<__half*>(&lowest_half);
63  return lowest_value;
64 }
65 
66 template<>
67 inline
68 hip_bfloat16 get_lowest_value<hip_bfloat16>()
69 {
70  return hip_bfloat16(-3.38953138925e+38f);
71 }
72 
73 template<class T>
74 inline
75 T get_max_value()
76 {
77  return std::numeric_limits<T>::max();
78 }
79 
80 template<>
81 inline
82 __half get_max_value<__half>()
83 {
84  unsigned short max_half = 0x7bff;
85  __half max_value = *reinterpret_cast<__half*>(&max_half);
86  return max_value;
87 }
88 
89 template<>
90 inline
91 hip_bfloat16 get_max_value<hip_bfloat16>()
92 {
93  return hip_bfloat16(3.38953138925e+38f);
94 }
95 
96 } // end detail namespace
97 
98 class DeviceReduce
99 {
100 public:
101  template <
102  typename InputIteratorT,
103  typename OutputIteratorT,
104  typename ReduceOpT,
105  typename T
106  >
107  HIPCUB_RUNTIME_FUNCTION static
108  hipError_t Reduce(void *d_temp_storage,
109  size_t &temp_storage_bytes,
110  InputIteratorT d_in,
111  OutputIteratorT d_out,
112  int num_items,
113  ReduceOpT reduction_op,
114  T init,
115  hipStream_t stream = 0,
116  bool debug_synchronous = false)
117  {
118  return ::rocprim::reduce(
119  d_temp_storage, temp_storage_bytes,
120  d_in, d_out, init, num_items,
121  ::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(reduction_op),
122  stream, debug_synchronous
123  );
124  }
125 
126  template <
127  typename InputIteratorT,
128  typename OutputIteratorT
129  >
130  HIPCUB_RUNTIME_FUNCTION static
131  hipError_t Sum(void *d_temp_storage,
132  size_t &temp_storage_bytes,
133  InputIteratorT d_in,
134  OutputIteratorT d_out,
135  int num_items,
136  hipStream_t stream = 0,
137  bool debug_synchronous = false)
138  {
139  using T = typename std::iterator_traits<InputIteratorT>::value_type;
140  return Reduce(
141  d_temp_storage, temp_storage_bytes,
142  d_in, d_out, num_items, ::hipcub::Sum(), T(0),
143  stream, debug_synchronous
144  );
145  }
146 
147  template <
148  typename InputIteratorT,
149  typename OutputIteratorT
150  >
151  HIPCUB_RUNTIME_FUNCTION static
152  hipError_t Min(void *d_temp_storage,
153  size_t &temp_storage_bytes,
154  InputIteratorT d_in,
155  OutputIteratorT d_out,
156  int num_items,
157  hipStream_t stream = 0,
158  bool debug_synchronous = false)
159  {
160  using T = typename std::iterator_traits<InputIteratorT>::value_type;
161  return Reduce(
162  d_temp_storage, temp_storage_bytes,
163  d_in, d_out, num_items, ::hipcub::Min(), detail::get_max_value<T>(),
164  stream, debug_synchronous
165  );
166  }
167 
168  template <
169  typename InputIteratorT,
170  typename OutputIteratorT
171  >
172  HIPCUB_RUNTIME_FUNCTION static
173  hipError_t ArgMin(void *d_temp_storage,
174  size_t &temp_storage_bytes,
175  InputIteratorT d_in,
176  OutputIteratorT d_out,
177  int num_items,
178  hipStream_t stream = 0,
179  bool debug_synchronous = false)
180  {
181  using OffsetT = int;
182  using T = typename std::iterator_traits<InputIteratorT>::value_type;
183  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
184  using OutputTupleT =
185  typename std::conditional<
186  std::is_same<O, void>::value,
187  KeyValuePair<OffsetT, T>,
188  O
189  >::type;
190 
191  using OutputValueT = typename OutputTupleT::Value;
192  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
193 
194  IteratorT d_indexed_in(d_in);
195  OutputTupleT init(1, detail::get_max_value<T>());
196 
197  return Reduce(
198  d_temp_storage, temp_storage_bytes,
199  d_indexed_in, d_out, num_items, ::hipcub::ArgMin(), init,
200  stream, debug_synchronous
201  );
202  }
203 
204  template <
205  typename InputIteratorT,
206  typename OutputIteratorT
207  >
208  HIPCUB_RUNTIME_FUNCTION static
209  hipError_t Max(void *d_temp_storage,
210  size_t &temp_storage_bytes,
211  InputIteratorT d_in,
212  OutputIteratorT d_out,
213  int num_items,
214  hipStream_t stream = 0,
215  bool debug_synchronous = false)
216  {
217  using T = typename std::iterator_traits<InputIteratorT>::value_type;
218  return Reduce(
219  d_temp_storage, temp_storage_bytes,
220  d_in, d_out, num_items, ::hipcub::Max(), detail::get_lowest_value<T>(),
221  stream, debug_synchronous
222  );
223  }
224 
225  template <
226  typename InputIteratorT,
227  typename OutputIteratorT
228  >
229  HIPCUB_RUNTIME_FUNCTION static
230  hipError_t ArgMax(void *d_temp_storage,
231  size_t &temp_storage_bytes,
232  InputIteratorT d_in,
233  OutputIteratorT d_out,
234  int num_items,
235  hipStream_t stream = 0,
236  bool debug_synchronous = false)
237  {
238  using OffsetT = int;
239  using T = typename std::iterator_traits<InputIteratorT>::value_type;
240  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
241  using OutputTupleT =
242  typename std::conditional<
243  std::is_same<O, void>::value,
244  KeyValuePair<OffsetT, T>,
245  O
246  >::type;
247 
248  using OutputValueT = typename OutputTupleT::Value;
249  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
250 
251  IteratorT d_indexed_in(d_in);
252  OutputTupleT init(1, detail::get_lowest_value<T>());
253 
254  return Reduce(
255  d_temp_storage, temp_storage_bytes,
256  d_indexed_in, d_out, num_items, ::hipcub::ArgMax(), init,
257  stream, debug_synchronous
258  );
259  }
260 
261  template<
262  typename KeysInputIteratorT,
263  typename UniqueOutputIteratorT,
264  typename ValuesInputIteratorT,
265  typename AggregatesOutputIteratorT,
266  typename NumRunsOutputIteratorT,
267  typename ReductionOpT
268  >
269  HIPCUB_RUNTIME_FUNCTION static
270  hipError_t ReduceByKey(void * d_temp_storage,
271  size_t& temp_storage_bytes,
272  KeysInputIteratorT d_keys_in,
273  UniqueOutputIteratorT d_unique_out,
274  ValuesInputIteratorT d_values_in,
275  AggregatesOutputIteratorT d_aggregates_out,
276  NumRunsOutputIteratorT d_num_runs_out,
277  ReductionOpT reduction_op,
278  int num_items,
279  hipStream_t stream = 0,
280  bool debug_synchronous = false)
281  {
282  using key_compare_op =
283  ::rocprim::equal_to<typename std::iterator_traits<KeysInputIteratorT>::value_type>;
284  return ::rocprim::reduce_by_key(
285  d_temp_storage, temp_storage_bytes,
286  d_keys_in, d_values_in, num_items,
287  d_unique_out, d_aggregates_out, d_num_runs_out,
288  ::hipcub::detail::convert_result_type<ValuesInputIteratorT, AggregatesOutputIteratorT>(reduction_op),
289  key_compare_op(),
290  stream, debug_synchronous
291  );
292  }
293 };
294 
295 END_HIPCUB_NAMESPACE
296 
297 #endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
Definition: thread_operators.hpp:126
Definition: thread_operators.hpp:141
Definition: thread_operators.hpp:106
Definition: thread_operators.hpp:116
Definition: thread_operators.hpp:76