/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp Source File
device_reduce.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
31 #define HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
32 
33 #include <limits>
34 #include <iterator>
35 
36 #include <hip/hip_fp16.h> // __half
37 
38 #include "../../../config.hpp"
39 #include "../iterator/arg_index_input_iterator.hpp"
40 #include "../thread/thread_operators.hpp"
41 
42 #include <rocprim/device/device_reduce.hpp>
43 #include <rocprim/device/device_reduce_by_key.hpp>
44 
45 BEGIN_HIPCUB_NAMESPACE
46 namespace detail
47 {
48 
49 template<class T>
50 inline
51 T get_lowest_value()
52 {
53  return std::numeric_limits<T>::lowest();
54 }
55 
56 template<>
57 inline
58 __half get_lowest_value<__half>()
59 {
60  unsigned short lowest_half = 0xfbff;
61  __half lowest_value = *reinterpret_cast<__half*>(&lowest_half);
62  return lowest_value;
63 }
64 
65 template<class T>
66 inline
67 T get_max_value()
68 {
69  return std::numeric_limits<T>::max();
70 }
71 
72 template<>
73 inline
74 __half get_max_value<__half>()
75 {
76  unsigned short max_half = 0x7bff;
77  __half max_value = *reinterpret_cast<__half*>(&max_half);
78  return max_value;
79 }
80 
81 } // end detail namespace
82 
83 class DeviceReduce
84 {
85 public:
86  template <
87  typename InputIteratorT,
88  typename OutputIteratorT,
89  typename ReduceOpT,
90  typename T
91  >
92  HIPCUB_RUNTIME_FUNCTION static
93  hipError_t Reduce(void *d_temp_storage,
94  size_t &temp_storage_bytes,
95  InputIteratorT d_in,
96  OutputIteratorT d_out,
97  int num_items,
98  ReduceOpT reduction_op,
99  T init,
100  hipStream_t stream = 0,
101  bool debug_synchronous = false)
102  {
103  return ::rocprim::reduce(
104  d_temp_storage, temp_storage_bytes,
105  d_in, d_out, init, num_items,
106  ::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(reduction_op),
107  stream, debug_synchronous
108  );
109  }
110 
111  template <
112  typename InputIteratorT,
113  typename OutputIteratorT
114  >
115  HIPCUB_RUNTIME_FUNCTION static
116  hipError_t Sum(void *d_temp_storage,
117  size_t &temp_storage_bytes,
118  InputIteratorT d_in,
119  OutputIteratorT d_out,
120  int num_items,
121  hipStream_t stream = 0,
122  bool debug_synchronous = false)
123  {
124  using T = typename std::iterator_traits<InputIteratorT>::value_type;
125  return Reduce(
126  d_temp_storage, temp_storage_bytes,
127  d_in, d_out, num_items, ::hipcub::Sum(), T(0),
128  stream, debug_synchronous
129  );
130  }
131 
132  template <
133  typename InputIteratorT,
134  typename OutputIteratorT
135  >
136  HIPCUB_RUNTIME_FUNCTION static
137  hipError_t Min(void *d_temp_storage,
138  size_t &temp_storage_bytes,
139  InputIteratorT d_in,
140  OutputIteratorT d_out,
141  int num_items,
142  hipStream_t stream = 0,
143  bool debug_synchronous = false)
144  {
145  using T = typename std::iterator_traits<InputIteratorT>::value_type;
146  return Reduce(
147  d_temp_storage, temp_storage_bytes,
148  d_in, d_out, num_items, ::hipcub::Min(), detail::get_max_value<T>(),
149  stream, debug_synchronous
150  );
151  }
152 
153  template <
154  typename InputIteratorT,
155  typename OutputIteratorT
156  >
157  HIPCUB_RUNTIME_FUNCTION static
158  hipError_t ArgMin(void *d_temp_storage,
159  size_t &temp_storage_bytes,
160  InputIteratorT d_in,
161  OutputIteratorT d_out,
162  int num_items,
163  hipStream_t stream = 0,
164  bool debug_synchronous = false)
165  {
166  using OffsetT = int;
167  using T = typename std::iterator_traits<InputIteratorT>::value_type;
168  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
169  using OutputTupleT =
170  typename std::conditional<
171  std::is_same<O, void>::value,
172  KeyValuePair<OffsetT, T>,
173  O
174  >::type;
175 
176  using OutputValueT = typename OutputTupleT::Value;
177  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
178 
179  IteratorT d_indexed_in(d_in);
180  OutputTupleT init(1, detail::get_max_value<T>());
181 
182  return Reduce(
183  d_temp_storage, temp_storage_bytes,
184  d_indexed_in, d_out, num_items, ::hipcub::ArgMin(), init,
185  stream, debug_synchronous
186  );
187  }
188 
189  template <
190  typename InputIteratorT,
191  typename OutputIteratorT
192  >
193  HIPCUB_RUNTIME_FUNCTION static
194  hipError_t Max(void *d_temp_storage,
195  size_t &temp_storage_bytes,
196  InputIteratorT d_in,
197  OutputIteratorT d_out,
198  int num_items,
199  hipStream_t stream = 0,
200  bool debug_synchronous = false)
201  {
202  using T = typename std::iterator_traits<InputIteratorT>::value_type;
203  return Reduce(
204  d_temp_storage, temp_storage_bytes,
205  d_in, d_out, num_items, ::hipcub::Max(), detail::get_lowest_value<T>(),
206  stream, debug_synchronous
207  );
208  }
209 
210  template <
211  typename InputIteratorT,
212  typename OutputIteratorT
213  >
214  HIPCUB_RUNTIME_FUNCTION static
215  hipError_t ArgMax(void *d_temp_storage,
216  size_t &temp_storage_bytes,
217  InputIteratorT d_in,
218  OutputIteratorT d_out,
219  int num_items,
220  hipStream_t stream = 0,
221  bool debug_synchronous = false)
222  {
223  using OffsetT = int;
224  using T = typename std::iterator_traits<InputIteratorT>::value_type;
225  using O = typename std::iterator_traits<OutputIteratorT>::value_type;
226  using OutputTupleT =
227  typename std::conditional<
228  std::is_same<O, void>::value,
229  KeyValuePair<OffsetT, T>,
230  O
231  >::type;
232 
233  using OutputValueT = typename OutputTupleT::Value;
234  using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
235 
236  IteratorT d_indexed_in(d_in);
237  OutputTupleT init(1, detail::get_lowest_value<T>());
238 
239  return Reduce(
240  d_temp_storage, temp_storage_bytes,
241  d_indexed_in, d_out, num_items, ::hipcub::ArgMax(), init,
242  stream, debug_synchronous
243  );
244  }
245 
246  template<
247  typename KeysInputIteratorT,
248  typename UniqueOutputIteratorT,
249  typename ValuesInputIteratorT,
250  typename AggregatesOutputIteratorT,
251  typename NumRunsOutputIteratorT,
252  typename ReductionOpT
253  >
254  HIPCUB_RUNTIME_FUNCTION static
255  hipError_t ReduceByKey(void * d_temp_storage,
256  size_t& temp_storage_bytes,
257  KeysInputIteratorT d_keys_in,
258  UniqueOutputIteratorT d_unique_out,
259  ValuesInputIteratorT d_values_in,
260  AggregatesOutputIteratorT d_aggregates_out,
261  NumRunsOutputIteratorT d_num_runs_out,
262  ReductionOpT reduction_op,
263  int num_items,
264  hipStream_t stream = 0,
265  bool debug_synchronous = false)
266  {
267  using key_compare_op =
268  ::rocprim::equal_to<typename std::iterator_traits<KeysInputIteratorT>::value_type>;
269  return ::rocprim::reduce_by_key(
270  d_temp_storage, temp_storage_bytes,
271  d_keys_in, d_values_in, num_items,
272  d_unique_out, d_aggregates_out, d_num_runs_out,
273  ::hipcub::detail::convert_result_type<ValuesInputIteratorT, AggregatesOutputIteratorT>(reduction_op),
274  key_compare_op(),
275  stream, debug_synchronous
276  );
277  }
278 };
279 
280 END_HIPCUB_NAMESPACE
281 
282 #endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_
Definition: thread_operators.hpp:106
Definition: thread_operators.hpp:121
Definition: thread_operators.hpp:86
Definition: thread_operators.hpp:96
Definition: thread_operators.hpp:76