/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.3/hipcub/include/hipcub/backend/rocprim/block/block_exchange.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.3/hipcub/include/hipcub/backend/rocprim/block/block_exchange.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.3/hipcub/include/hipcub/backend/rocprim/block/block_exchange.hpp Source File
block_exchange.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_
31 #define HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_
32 
33 #include "../../../config.hpp"
34 
35 #include <rocprim/block/block_exchange.hpp>
36 
37 BEGIN_HIPCUB_NAMESPACE
38 
39 template<
40  typename InputT,
41  int BLOCK_DIM_X,
42  int ITEMS_PER_THREAD,
43  bool WARP_TIME_SLICING = false, /* ignored */
44  int BLOCK_DIM_Y = 1,
45  int BLOCK_DIM_Z = 1,
46  int ARCH = HIPCUB_ARCH /* ignored */
47 >
49  : private ::rocprim::block_exchange<
50  InputT,
51  BLOCK_DIM_X,
52  ITEMS_PER_THREAD,
53  BLOCK_DIM_Y,
54  BLOCK_DIM_Z
55  >
56 {
57  static_assert(
58  BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0,
59  "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0"
60  );
61 
62  using base_type =
63  typename ::rocprim::block_exchange<
64  InputT,
65  BLOCK_DIM_X,
66  ITEMS_PER_THREAD,
67  BLOCK_DIM_Y,
68  BLOCK_DIM_Z
69  >;
70 
71  // Reference to temporary storage (usually shared memory)
72  typename base_type::storage_type& temp_storage_;
73 
74 public:
75  using TempStorage = typename base_type::storage_type;
76 
77  HIPCUB_DEVICE inline
78  BlockExchange() : temp_storage_(private_storage())
79  {
80  }
81 
82  HIPCUB_DEVICE inline
83  BlockExchange(TempStorage& temp_storage) : temp_storage_(temp_storage)
84  {
85  }
86 
87  template<typename OutputT>
88  HIPCUB_DEVICE inline
89  void StripedToBlocked(InputT (&input_items)[ITEMS_PER_THREAD],
90  OutputT (&output_items)[ITEMS_PER_THREAD])
91  {
92  base_type::striped_to_blocked(input_items, output_items, temp_storage_);
93  }
94 
95  template<typename OutputT>
96  HIPCUB_DEVICE inline
97  void BlockedToStriped(InputT (&input_items)[ITEMS_PER_THREAD],
98  OutputT (&output_items)[ITEMS_PER_THREAD])
99  {
100  base_type::blocked_to_striped(input_items, output_items, temp_storage_);
101  }
102 
103  template<typename OutputT>
104  HIPCUB_DEVICE inline
105  void WarpStripedToBlocked(InputT (&input_items)[ITEMS_PER_THREAD],
106  OutputT (&output_items)[ITEMS_PER_THREAD])
107  {
108  base_type::warp_striped_to_blocked(input_items, output_items, temp_storage_);
109  }
110 
111  template<typename OutputT>
112  HIPCUB_DEVICE inline
113  void BlockedToWarpStriped(InputT (&input_items)[ITEMS_PER_THREAD],
114  OutputT (&output_items)[ITEMS_PER_THREAD])
115  {
116  base_type::blocked_to_warp_striped(input_items, output_items, temp_storage_);
117  }
118 
119  template<typename OutputT, typename OffsetT>
120  HIPCUB_DEVICE inline
121  void ScatterToBlocked(InputT (&input_items)[ITEMS_PER_THREAD],
122  OutputT (&output_items)[ITEMS_PER_THREAD],
123  OffsetT (&ranks)[ITEMS_PER_THREAD])
124  {
125  base_type::scatter_to_blocked(input_items, output_items, ranks, temp_storage_);
126  }
127 
128  template<typename OutputT, typename OffsetT>
129  HIPCUB_DEVICE inline
130  void ScatterToStriped(InputT (&input_items)[ITEMS_PER_THREAD],
131  OutputT (&output_items)[ITEMS_PER_THREAD],
132  OffsetT (&ranks)[ITEMS_PER_THREAD])
133  {
134  base_type::scatter_to_striped(input_items, output_items, ranks, temp_storage_);
135  }
136 
137  template<typename OutputT, typename OffsetT>
138  HIPCUB_DEVICE inline
139  void ScatterToStripedGuarded(InputT (&input_items)[ITEMS_PER_THREAD],
140  OutputT (&output_items)[ITEMS_PER_THREAD],
141  OffsetT (&ranks)[ITEMS_PER_THREAD])
142  {
143  base_type::scatter_to_striped_guarded(input_items, output_items, ranks, temp_storage_);
144  }
145 
146  template<typename OutputT, typename OffsetT, typename ValidFlag>
147  HIPCUB_DEVICE inline
148  void ScatterToStripedFlagged(InputT (&input_items)[ITEMS_PER_THREAD],
149  OutputT (&output_items)[ITEMS_PER_THREAD],
150  OffsetT (&ranks)[ITEMS_PER_THREAD],
151  ValidFlag (&is_valid)[ITEMS_PER_THREAD])
152  {
153  base_type::scatter_to_striped_flagged(input_items, output_items, ranks, is_valid, temp_storage_);
154  }
155 
156 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
157 
158 
159  HIPCUB_DEVICE inline void StripedToBlocked(
160  InputT (&items)[ITEMS_PER_THREAD])
161  {
162  StripedToBlocked(items, items);
163  }
164 
165  HIPCUB_DEVICE inline void BlockedToStriped(
166  InputT (&items)[ITEMS_PER_THREAD])
167  {
168  BlockedToStriped(items, items);
169  }
170 
171  HIPCUB_DEVICE inline void WarpStripedToBlocked(
172  InputT (&items)[ITEMS_PER_THREAD])
173  {
174  WarpStripedToBlocked(items, items);
175  }
176 
177  HIPCUB_DEVICE inline void BlockedToWarpStriped(
178  InputT (&items)[ITEMS_PER_THREAD])
179  {
180  BlockedToWarpStriped(items, items);
181  }
182 
183  template <typename OffsetT>
184  HIPCUB_DEVICE inline void ScatterToBlocked(
185  InputT (&items)[ITEMS_PER_THREAD],
186  OffsetT (&ranks)[ITEMS_PER_THREAD])
187  {
188  ScatterToBlocked(items, items, ranks);
189  }
190 
191  template <typename OffsetT>
192  HIPCUB_DEVICE inline void ScatterToStriped(
193  InputT (&items)[ITEMS_PER_THREAD],
194  OffsetT (&ranks)[ITEMS_PER_THREAD])
195  {
196  ScatterToStriped(items, items, ranks);
197  }
198 
199  template <typename OffsetT>
200  HIPCUB_DEVICE inline void ScatterToStripedGuarded(
201  InputT (&items)[ITEMS_PER_THREAD],
202  OffsetT (&ranks)[ITEMS_PER_THREAD])
203  {
204  ScatterToStripedGuarded(items, items, ranks);
205  }
206 
207  template <typename OffsetT, typename ValidFlag>
208  HIPCUB_DEVICE inline void ScatterToStripedFlagged(
209  InputT (&items)[ITEMS_PER_THREAD],
210  OffsetT (&ranks)[ITEMS_PER_THREAD],
211  ValidFlag (&is_valid)[ITEMS_PER_THREAD])
212  {
213  ScatterToStriped(items, items, ranks, is_valid);
214  }
215 
216 #endif // DOXYGEN_SHOULD_SKIP_THIS
217 
218 private:
219  HIPCUB_DEVICE inline
220  TempStorage& private_storage()
221  {
222  HIPCUB_SHARED_MEMORY TempStorage private_storage;
223  return private_storage;
224  }
225 };
226 
227 END_HIPCUB_NAMESPACE
228 
229 #endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_
Definition: block_exchange.hpp:56