30 #ifndef HIPCUB_ROCPRIM_WARP_WARP_STORE_HPP_
31 #define HIPCUB_ROCPRIM_WARP_WARP_STORE_HPP_
33 #include "../../../config.hpp"
35 #include "../util_type.hpp"
36 #include "./warp_exchange.hpp"
38 #include <rocprim/block/block_store_func.hpp>
40 BEGIN_HIPCUB_NAMESPACE
42 enum WarpStoreAlgorithm
53 WarpStoreAlgorithm ALGORITHM = WARP_STORE_DIRECT,
54 int LOGICAL_WARP_THREADS = HIPCUB_DEVICE_WARP_THREADS,
55 int ARCH = HIPCUB_ARCH
60 constexpr
static bool IS_ARCH_WARP
61 =
static_cast<unsigned>(LOGICAL_WARP_THREADS) == HIPCUB_DEVICE_WARP_THREADS;
63 template <WarpStoreAlgorithm _POLICY>
67 struct StoreInternal<WARP_STORE_DIRECT>
72 HIPCUB_DEVICE __forceinline__ StoreInternal(
75 : linear_tid(linear_tid)
79 template <
typename OutputIteratorT>
80 HIPCUB_DEVICE __forceinline__
void Store(
81 OutputIteratorT block_itr,
82 T (&items)[ITEMS_PER_THREAD])
84 ::rocprim::block_store_direct_blocked(
85 static_cast<unsigned>(linear_tid),
91 template <
typename OutputIteratorT>
92 HIPCUB_DEVICE __forceinline__
void Store(
93 OutputIteratorT block_itr,
94 T (&items)[ITEMS_PER_THREAD],
97 ::rocprim::block_store_direct_blocked(
98 static_cast<unsigned>(linear_tid),
101 static_cast<unsigned>(valid_items)
107 struct StoreInternal<WARP_STORE_STRIPED>
112 HIPCUB_DEVICE __forceinline__ StoreInternal(
115 : linear_tid(linear_tid)
119 template <
typename OutputIteratorT>
120 HIPCUB_DEVICE __forceinline__
void Store(
121 OutputIteratorT block_itr,
122 T (&items)[ITEMS_PER_THREAD])
124 ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
125 static_cast<unsigned>(linear_tid),
131 template <
typename OutputIteratorT>
132 HIPCUB_DEVICE __forceinline__
void Store(
133 OutputIteratorT block_itr,
134 T (&items)[ITEMS_PER_THREAD],
137 ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
138 static_cast<unsigned>(linear_tid),
141 static_cast<unsigned>(valid_items)
147 struct StoreInternal<WARP_STORE_VECTORIZE>
152 HIPCUB_DEVICE __forceinline__ StoreInternal(
155 : linear_tid(linear_tid)
159 template <
typename OutputIteratorT>
160 HIPCUB_DEVICE __forceinline__
void Store(
162 T (&items)[ITEMS_PER_THREAD])
164 ::rocprim::block_store_direct_blocked_vectorized(
165 static_cast<unsigned>(linear_tid),
171 template <
typename _OutputIteratorT>
172 HIPCUB_DEVICE __forceinline__
void Store(
173 _OutputIteratorT block_itr,
174 T (&items)[ITEMS_PER_THREAD])
176 ::rocprim::block_store_direct_blocked_vectorized(
177 static_cast<unsigned>(linear_tid),
183 template <
typename OutputIteratorT>
184 HIPCUB_DEVICE __forceinline__
void Store(
185 OutputIteratorT block_itr,
186 T (&items)[ITEMS_PER_THREAD],
191 ::rocprim::block_store_direct_blocked(
192 static_cast<unsigned>(linear_tid),
195 static_cast<unsigned>(valid_items)
201 struct StoreInternal<WARP_STORE_TRANSPOSE>
206 LOGICAL_WARP_THREADS,
209 using TempStorage =
typename WarpExchangeT::TempStorage;
213 HIPCUB_DEVICE __forceinline__ StoreInternal(
216 temp_storage(temp_storage),
217 linear_tid(linear_tid)
221 template <
typename OutputIteratorT>
222 HIPCUB_DEVICE __forceinline__
void Store(
223 OutputIteratorT block_itr,
224 T (&items)[ITEMS_PER_THREAD])
226 WarpExchangeT(temp_storage).BlockedToStriped(items, items);
227 ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
228 static_cast<unsigned>(linear_tid),
234 template <
typename OutputIteratorT>
235 HIPCUB_DEVICE __forceinline__
void Store(
236 OutputIteratorT block_itr,
237 T (&items)[ITEMS_PER_THREAD],
240 WarpExchangeT(temp_storage).BlockedToStriped(items, items);
241 ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
242 static_cast<unsigned>(linear_tid),
245 static_cast<unsigned>(valid_items)
251 using InternalStore = StoreInternal<ALGORITHM>;
253 using _TempStorage =
typename InternalStore::TempStorage;
255 HIPCUB_DEVICE __forceinline__ _TempStorage &PrivateStorage()
257 __shared__ _TempStorage private_storage;
258 return private_storage;
261 _TempStorage &temp_storage;
269 HIPCUB_DEVICE __forceinline__
271 temp_storage(PrivateStorage()),
272 linear_tid(IS_ARCH_WARP ? ::rocprim::lane_id() : (::rocprim::lane_id() % LOGICAL_WARP_THREADS))
276 HIPCUB_DEVICE __forceinline__
278 temp_storage(temp_storage.Alias()),
279 linear_tid(IS_ARCH_WARP ? ::rocprim::lane_id() : (::rocprim::lane_id() % LOGICAL_WARP_THREADS))
283 template <
typename OutputIteratorT>
284 HIPCUB_DEVICE __forceinline__
void Store(
285 OutputIteratorT block_itr,
286 T (&items)[ITEMS_PER_THREAD])
288 InternalStore(temp_storage, linear_tid)
289 .Store(block_itr, items);
292 template <
typename OutputIteratorT>
293 HIPCUB_DEVICE __forceinline__
void Store(
294 OutputIteratorT block_itr,
295 T (&items)[ITEMS_PER_THREAD],
298 InternalStore(temp_storage, linear_tid)
299 .Store(block_itr, items, valid_items);
302 template <
typename OutputIteratorT,
304 HIPCUB_DEVICE __forceinline__
void Store(
305 OutputIteratorT block_itr,
306 T (&items)[ITEMS_PER_THREAD],
308 DefaultT oob_default)
310 InternalStore(temp_storage, linear_tid)
311 .Store(block_itr, items, valid_items, oob_default);
Definition: warp_exchange.hpp:47
Definition: warp_store.hpp:58
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:363
Definition: warp_store.hpp:266