30 #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_
31 #define HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_
36 #include "../../../config.hpp"
38 #include "../thread/thread_operators.hpp"
39 #include "../iterator/arg_index_input_iterator.hpp"
41 #include <rocprim/device/device_segmented_reduce.hpp>
43 BEGIN_HIPCUB_NAMESPACE
45 struct DeviceSegmentedReduce
48 typename InputIteratorT,
49 typename OutputIteratorT,
50 typename OffsetIteratorT,
54 HIPCUB_RUNTIME_FUNCTION
static
55 hipError_t Reduce(
void * d_temp_storage,
56 size_t& temp_storage_bytes,
58 OutputIteratorT d_out,
60 OffsetIteratorT d_begin_offsets,
61 OffsetIteratorT d_end_offsets,
62 ReductionOp reduction_op,
64 hipStream_t stream = 0,
65 bool debug_synchronous =
false)
67 return ::rocprim::segmented_reduce(
68 d_temp_storage, temp_storage_bytes,
70 num_segments, d_begin_offsets, d_end_offsets,
71 ::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(reduction_op),
73 stream, debug_synchronous
78 typename InputIteratorT,
79 typename OutputIteratorT,
80 typename OffsetIteratorT
82 HIPCUB_RUNTIME_FUNCTION
static
83 hipError_t Sum(
void * d_temp_storage,
84 size_t& temp_storage_bytes,
86 OutputIteratorT d_out,
88 OffsetIteratorT d_begin_offsets,
89 OffsetIteratorT d_end_offsets,
90 hipStream_t stream = 0,
91 bool debug_synchronous =
false)
93 using input_type =
typename std::iterator_traits<InputIteratorT>::value_type;
96 d_temp_storage, temp_storage_bytes,
98 num_segments, d_begin_offsets, d_end_offsets,
100 stream, debug_synchronous
105 typename InputIteratorT,
106 typename OutputIteratorT,
107 typename OffsetIteratorT
109 HIPCUB_RUNTIME_FUNCTION
static
110 hipError_t Min(
void * d_temp_storage,
111 size_t& temp_storage_bytes,
113 OutputIteratorT d_out,
115 OffsetIteratorT d_begin_offsets,
116 OffsetIteratorT d_end_offsets,
117 hipStream_t stream = 0,
118 bool debug_synchronous =
false)
120 using input_type =
typename std::iterator_traits<InputIteratorT>::value_type;
123 d_temp_storage, temp_storage_bytes,
125 num_segments, d_begin_offsets, d_end_offsets,
126 ::
hipcub::Min(), std::numeric_limits<input_type>::max(),
127 stream, debug_synchronous
132 typename InputIteratorT,
133 typename OutputIteratorT,
134 typename OffsetIteratorT
136 HIPCUB_RUNTIME_FUNCTION
static
137 hipError_t ArgMin(
void * d_temp_storage,
138 size_t& temp_storage_bytes,
140 OutputIteratorT d_out,
142 OffsetIteratorT d_begin_offsets,
143 OffsetIteratorT d_end_offsets,
144 hipStream_t stream = 0,
145 bool debug_synchronous =
false)
148 using T =
typename std::iterator_traits<InputIteratorT>::value_type;
149 using O =
typename std::iterator_traits<OutputIteratorT>::value_type;
150 using OutputTupleT =
typename std::conditional<
151 std::is_same<O, void>::value,
152 KeyValuePair<OffsetT, T>,
156 using OutputValueT =
typename OutputTupleT::Value;
157 using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
159 IteratorT d_indexed_in(d_in);
160 const OutputTupleT init(1, std::numeric_limits<T>::max());
163 d_temp_storage, temp_storage_bytes,
165 num_segments, d_begin_offsets, d_end_offsets,
167 stream, debug_synchronous
172 typename InputIteratorT,
173 typename OutputIteratorT,
174 typename OffsetIteratorT
176 HIPCUB_RUNTIME_FUNCTION
static
177 hipError_t Max(
void * d_temp_storage,
178 size_t& temp_storage_bytes,
180 OutputIteratorT d_out,
182 OffsetIteratorT d_begin_offsets,
183 OffsetIteratorT d_end_offsets,
184 hipStream_t stream = 0,
185 bool debug_synchronous =
false)
187 using input_type =
typename std::iterator_traits<InputIteratorT>::value_type;
190 d_temp_storage, temp_storage_bytes,
192 num_segments, d_begin_offsets, d_end_offsets,
193 ::
hipcub::Max(), std::numeric_limits<input_type>::lowest(),
194 stream, debug_synchronous
199 typename InputIteratorT,
200 typename OutputIteratorT,
201 typename OffsetIteratorT
203 HIPCUB_RUNTIME_FUNCTION
static
204 hipError_t ArgMax(
void * d_temp_storage,
205 size_t& temp_storage_bytes,
207 OutputIteratorT d_out,
209 OffsetIteratorT d_begin_offsets,
210 OffsetIteratorT d_end_offsets,
211 hipStream_t stream = 0,
212 bool debug_synchronous =
false)
215 using T =
typename std::iterator_traits<InputIteratorT>::value_type;
216 using O =
typename std::iterator_traits<OutputIteratorT>::value_type;
217 using OutputTupleT =
typename std::conditional<
218 std::is_same<O, void>::value,
219 KeyValuePair<OffsetT, T>,
223 using OutputValueT =
typename OutputTupleT::Value;
224 using IteratorT = ArgIndexInputIterator<InputIteratorT, OffsetT, OutputValueT>;
226 IteratorT d_indexed_in(d_in);
227 const OutputTupleT init(1, std::numeric_limits<T>::lowest());
230 d_temp_storage, temp_storage_bytes,
232 num_segments, d_begin_offsets, d_end_offsets,
234 stream, debug_synchronous
Definition: thread_operators.hpp:106
Definition: thread_operators.hpp:121
Definition: thread_operators.hpp:86
Definition: thread_operators.hpp:96
Definition: thread_operators.hpp:76