/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/warp/warp_exchange.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/warp/warp_exchange.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/warp/warp_exchange.hpp Source File
warp_exchange.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_WARP_WARP_EXCHANGE_HPP_
31 #define HIPCUB_ROCPRIM_WARP_WARP_EXCHANGE_HPP_
32 
33 #include "../../../config.hpp"
34 #include "../util_type.hpp"
35 
36 BEGIN_HIPCUB_NAMESPACE
37 
38 template <
39  typename InputT,
40  int ITEMS_PER_THREAD,
41  int LOGICAL_WARP_THREADS = HIPCUB_DEVICE_WARP_THREADS,
42  int ARCH = HIPCUB_ARCH
43 >
45 {
47  "LOGICAL_WARP_THREADS must be a power of two");
48 
49  constexpr static int SMEM_BANKS = ::rocprim::detail::get_lds_banks_no();
50 
51  constexpr static bool HAS_BANK_CONFLICTS =
52  ITEMS_PER_THREAD > 4 && PowerOfTwo<ITEMS_PER_THREAD>::VALUE;
53 
54  constexpr static int BANK_CONFLICTS_PADDING =
55  HAS_BANK_CONFLICTS ? (ITEMS_PER_THREAD / SMEM_BANKS) : 0;
56 
57  constexpr static int ITEMS_PER_TILE =
58  ITEMS_PER_THREAD * LOGICAL_WARP_THREADS + BANK_CONFLICTS_PADDING;
59 
60  constexpr static bool IS_ARCH_WARP = LOGICAL_WARP_THREADS ==
61  HIPCUB_DEVICE_WARP_THREADS;
62 
63  union _TempStorage
64  {
65  InputT items_shared[ITEMS_PER_TILE];
66  };
67 
68  _TempStorage &temp_storage;
69  unsigned lane_id;
70 
71 public:
72  struct TempStorage : Uninitialized<_TempStorage> {};
73 
74  WarpExchange() = delete;
75 
76  explicit HIPCUB_DEVICE __forceinline__
77  WarpExchange(TempStorage &temp_storage) :
78  temp_storage(temp_storage.Alias()),
79  lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS)
80  {
81  }
82 
83  template <typename OutputT>
84  HIPCUB_DEVICE __forceinline__
85  void BlockedToStriped(
86  const InputT (&input_items)[ITEMS_PER_THREAD],
87  OutputT (&output_items)[ITEMS_PER_THREAD])
88  {
89  for (int item = 0; item < ITEMS_PER_THREAD; ++item)
90  {
91  const int idx = ITEMS_PER_THREAD * lane_id + item;
92  temp_storage.items_shared[idx] = input_items[item];
93  }
94 
95  // member mask is unused in rocPRIM
96  WARP_SYNC(0);
97 
98  for (int item = 0; item < ITEMS_PER_THREAD; ++item)
99  {
100  const int idx = LOGICAL_WARP_THREADS * item + lane_id;
101  output_items[item] = temp_storage.items_shared[idx];
102  }
103  }
104 
105  template <typename OutputT>
106  HIPCUB_DEVICE __forceinline__
107  void StripedToBlocked(
108  const InputT (&input_items)[ITEMS_PER_THREAD],
109  OutputT (&output_items)[ITEMS_PER_THREAD])
110  {
111  for (int item = 0; item < ITEMS_PER_THREAD; ++item)
112  {
113  const int idx = LOGICAL_WARP_THREADS * item + lane_id;
114  temp_storage.items_shared[idx] = input_items[item];
115  }
116 
117  // member mask is unused in rocPRIM
118  WARP_SYNC(0);
119 
120  for (int item = 0; item < ITEMS_PER_THREAD; ++item)
121  {
122  const int idx = ITEMS_PER_THREAD * lane_id + item;
123  output_items[item] = temp_storage.items_shared[idx];
124  }
125  }
126 
127  template <typename OffsetT>
128  HIPCUB_DEVICE __forceinline__
129  void ScatterToStriped(
130  InputT (&items)[ITEMS_PER_THREAD],
131  OffsetT (&ranks)[ITEMS_PER_THREAD])
132  {
133  ScatterToStriped(items, items, ranks);
134  }
135 
136  template <typename OutputT,
137  typename OffsetT>
138  HIPCUB_DEVICE __forceinline__
139  void ScatterToStriped(
140  const InputT (&input_items)[ITEMS_PER_THREAD],
141  OutputT (&output_items)[ITEMS_PER_THREAD],
142  OffsetT (&ranks)[ITEMS_PER_THREAD])
143  {
144  ROCPRIM_UNROLL
145  for (int item = 0; item < ITEMS_PER_THREAD; ++item)
146  {
147  temp_storage.items_shared[ranks[item]] = input_items[item];
148  }
149 
150  // member mask is unused in rocPRIM
151  WARP_SYNC(0);
152 
153  ROCPRIM_UNROLL
154  for (int item = 0; item < ITEMS_PER_THREAD; item++)
155  {
156  int item_offset = (item * LOGICAL_WARP_THREADS) + lane_id;
157  output_items[item] = temp_storage.items_shared[item_offset];
158  }
159  }
160 };
161 
162 END_HIPCUB_NAMESPACE
163 
164 #endif // HIPCUB_ROCPRIM_WARP_WARP_EXCHANGE_HPP_
Definition: warp_exchange.hpp:45
Definition: util_type.hpp:78
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:363
Definition: warp_exchange.hpp:72