/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/warp/warp_store.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/warp/warp_store.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/warp/warp_store.hpp Source File
warp_store.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_WARP_WARP_STORE_HPP_
31 #define HIPCUB_ROCPRIM_WARP_WARP_STORE_HPP_
32 
33 #include "../../../config.hpp"
34 
35 #include "../util_type.hpp"
36 #include "./warp_exchange.hpp"
37 
38 #include <rocprim/block/block_store_func.hpp>
39 
40 BEGIN_HIPCUB_NAMESPACE
41 
42 enum WarpStoreAlgorithm
43 {
44  WARP_STORE_DIRECT,
45  WARP_STORE_STRIPED,
46  WARP_STORE_VECTORIZE,
47  WARP_STORE_TRANSPOSE
48 };
49 
50 template<
51  class T,
52  int ITEMS_PER_THREAD,
53  WarpStoreAlgorithm ALGORITHM = WARP_STORE_DIRECT,
54  int LOGICAL_WARP_THREADS = HIPCUB_DEVICE_WARP_THREADS,
55  int ARCH = HIPCUB_ARCH
56 >
57 class WarpStore
58 {
59 private:
60  constexpr static bool IS_ARCH_WARP
61  = static_cast<unsigned>(LOGICAL_WARP_THREADS) == HIPCUB_DEVICE_WARP_THREADS;
62 
63  template <WarpStoreAlgorithm _POLICY>
64  struct StoreInternal;
65 
66  template <>
67  struct StoreInternal<WARP_STORE_DIRECT>
68  {
69  using TempStorage = NullType;
70  int linear_tid;
71 
72  HIPCUB_DEVICE __forceinline__ StoreInternal(
73  TempStorage & /*temp_storage*/,
74  int linear_tid)
75  : linear_tid(linear_tid)
76  {
77  }
78 
79  template <typename OutputIteratorT>
80  HIPCUB_DEVICE __forceinline__ void Store(
81  OutputIteratorT block_itr,
82  T (&items)[ITEMS_PER_THREAD])
83  {
84  ::rocprim::block_store_direct_blocked(
85  static_cast<unsigned>(linear_tid),
86  block_itr,
87  items
88  );
89  }
90 
91  template <typename OutputIteratorT>
92  HIPCUB_DEVICE __forceinline__ void Store(
93  OutputIteratorT block_itr,
94  T (&items)[ITEMS_PER_THREAD],
95  int valid_items)
96  {
97  ::rocprim::block_store_direct_blocked(
98  static_cast<unsigned>(linear_tid),
99  block_itr,
100  items,
101  static_cast<unsigned>(valid_items)
102  );
103  }
104  };
105 
106  template <>
107  struct StoreInternal<WARP_STORE_STRIPED>
108  {
109  using TempStorage = NullType;
110  int linear_tid;
111 
112  HIPCUB_DEVICE __forceinline__ StoreInternal(
113  TempStorage & /*temp_storage*/,
114  int linear_tid)
115  : linear_tid(linear_tid)
116  {
117  }
118 
119  template <typename OutputIteratorT>
120  HIPCUB_DEVICE __forceinline__ void Store(
121  OutputIteratorT block_itr,
122  T (&items)[ITEMS_PER_THREAD])
123  {
124  ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
125  static_cast<unsigned>(linear_tid),
126  block_itr,
127  items
128  );
129  }
130 
131  template <typename OutputIteratorT>
132  HIPCUB_DEVICE __forceinline__ void Store(
133  OutputIteratorT block_itr,
134  T (&items)[ITEMS_PER_THREAD],
135  int valid_items)
136  {
137  ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
138  static_cast<unsigned>(linear_tid),
139  block_itr,
140  items,
141  static_cast<unsigned>(valid_items)
142  );
143  }
144  };
145 
146  template <>
147  struct StoreInternal<WARP_STORE_VECTORIZE>
148  {
149  using TempStorage = NullType;
150  int linear_tid;
151 
152  HIPCUB_DEVICE __forceinline__ StoreInternal(
153  TempStorage & /*temp_storage*/,
154  int linear_tid)
155  : linear_tid(linear_tid)
156  {
157  }
158 
159  template <typename OutputIteratorT>
160  HIPCUB_DEVICE __forceinline__ void Store(
161  T *block_ptr,
162  T (&items)[ITEMS_PER_THREAD])
163  {
164  ::rocprim::block_store_direct_blocked_vectorized(
165  static_cast<unsigned>(linear_tid),
166  block_ptr,
167  items
168  );
169  }
170 
171  template <typename _OutputIteratorT>
172  HIPCUB_DEVICE __forceinline__ void Store(
173  _OutputIteratorT block_itr,
174  T (&items)[ITEMS_PER_THREAD])
175  {
176  ::rocprim::block_store_direct_blocked_vectorized(
177  static_cast<unsigned>(linear_tid),
178  block_itr,
179  items
180  );
181  }
182 
183  template <typename OutputIteratorT>
184  HIPCUB_DEVICE __forceinline__ void Store(
185  OutputIteratorT block_itr,
186  T (&items)[ITEMS_PER_THREAD],
187  int valid_items)
188  {
189  // vectorized overload does not exist
190  // fall back to direct blocked
191  ::rocprim::block_store_direct_blocked(
192  static_cast<unsigned>(linear_tid),
193  block_itr,
194  items,
195  static_cast<unsigned>(valid_items)
196  );
197  }
198  };
199 
200  template <>
201  struct StoreInternal<WARP_STORE_TRANSPOSE>
202  {
203  using WarpExchangeT = WarpExchange<
204  T,
205  ITEMS_PER_THREAD,
206  LOGICAL_WARP_THREADS,
207  ARCH
208  >;
209  using TempStorage = typename WarpExchangeT::TempStorage;
210  TempStorage& temp_storage;
211  int linear_tid;
212 
213  HIPCUB_DEVICE __forceinline__ StoreInternal(
214  TempStorage &temp_storage,
215  int linear_tid) :
216  temp_storage(temp_storage),
217  linear_tid(linear_tid)
218  {
219  }
220 
221  template <typename OutputIteratorT>
222  HIPCUB_DEVICE __forceinline__ void Store(
223  OutputIteratorT block_itr,
224  T (&items)[ITEMS_PER_THREAD])
225  {
226  WarpExchangeT(temp_storage).BlockedToStriped(items, items);
227  ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
228  static_cast<unsigned>(linear_tid),
229  block_itr,
230  items
231  );
232  }
233 
234  template <typename OutputIteratorT>
235  HIPCUB_DEVICE __forceinline__ void Store(
236  OutputIteratorT block_itr,
237  T (&items)[ITEMS_PER_THREAD],
238  int valid_items)
239  {
240  WarpExchangeT(temp_storage).BlockedToStriped(items, items);
241  ::rocprim::block_store_direct_warp_striped<LOGICAL_WARP_THREADS>(
242  static_cast<unsigned>(linear_tid),
243  block_itr,
244  items,
245  static_cast<unsigned>(valid_items)
246  );
247 
248  }
249  };
250 
251  using InternalStore = StoreInternal<ALGORITHM>;
252 
253  using _TempStorage = typename InternalStore::TempStorage;
254 
255  HIPCUB_DEVICE __forceinline__ _TempStorage &PrivateStorage()
256  {
257  __shared__ _TempStorage private_storage;
258  return private_storage;
259  }
260 
261  _TempStorage &temp_storage;
262  int linear_tid;
263 
264 public:
265  struct TempStorage : Uninitialized<_TempStorage>
266  {
267  };
268 
269  HIPCUB_DEVICE __forceinline__
270  WarpStore() :
271  temp_storage(PrivateStorage()),
272  linear_tid(IS_ARCH_WARP ? ::rocprim::lane_id() : (::rocprim::lane_id() % LOGICAL_WARP_THREADS))
273  {
274  }
275 
276  HIPCUB_DEVICE __forceinline__
277  WarpStore(TempStorage &temp_storage) :
278  temp_storage(temp_storage.Alias()),
279  linear_tid(IS_ARCH_WARP ? ::rocprim::lane_id() : (::rocprim::lane_id() % LOGICAL_WARP_THREADS))
280  {
281  }
282 
283  template <typename OutputIteratorT>
284  HIPCUB_DEVICE __forceinline__ void Store(
285  OutputIteratorT block_itr,
286  T (&items)[ITEMS_PER_THREAD])
287  {
288  InternalStore(temp_storage, linear_tid)
289  .Store(block_itr, items);
290  }
291 
292  template <typename OutputIteratorT>
293  HIPCUB_DEVICE __forceinline__ void Store(
294  OutputIteratorT block_itr,
295  T (&items)[ITEMS_PER_THREAD],
296  int valid_items)
297  {
298  InternalStore(temp_storage, linear_tid)
299  .Store(block_itr, items, valid_items);
300  }
301 
302  template <typename OutputIteratorT,
303  typename DefaultT>
304  HIPCUB_DEVICE __forceinline__ void Store(
305  OutputIteratorT block_itr,
306  T (&items)[ITEMS_PER_THREAD],
307  int valid_items,
308  DefaultT oob_default)
309  {
310  InternalStore(temp_storage, linear_tid)
311  .Store(block_itr, items, valid_items, oob_default);
312  }
313 };
314 
315 END_HIPCUB_NAMESPACE
316 
317 #endif // HIPCUB_ROCPRIM_WARP_WARP_STORE_HPP_
Definition: warp_exchange.hpp:47
Definition: warp_store.hpp:58
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:363
Definition: warp_store.hpp:266