/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.0/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp Source File
block_shuffle.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_SHUFFLE_HPP_
31 #define HIPCUB_ROCPRIM_BLOCK_BLOCK_SHUFFLE_HPP_
32 
33 #include <type_traits>
34 
35 #include "../../../config.hpp"
36 
37 #include "../thread/thread_operators.hpp"
38 
39 #include <rocprim/block/block_shuffle.hpp>
40 
41 BEGIN_HIPCUB_NAMESPACE
42 
43 
44 
45 template <
46  typename T,
47  int BLOCK_DIM_X,
48  int BLOCK_DIM_Y = 1,
49  int BLOCK_DIM_Z = 1,
50  int ARCH = HIPCUB_ARCH>
51 class BlockShuffle : public ::rocprim::block_shuffle<
52  T,
53  BLOCK_DIM_X,
54  BLOCK_DIM_Y,
55  BLOCK_DIM_Z>
56 {
57  static_assert(
58  BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0,
59  "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0"
60  );
61 
62  using base_type =
63  typename ::rocprim::block_shuffle<
64  T,
65  BLOCK_DIM_X,
66  BLOCK_DIM_Y,
67  BLOCK_DIM_Z
68  >;
69 
70  // Reference to temporary storage (usually shared memory)
71  typename base_type::storage_type& temp_storage_;
72 
73 public:
74  using TempStorage = typename base_type::storage_type;
75 
76  HIPCUB_DEVICE inline
77  BlockShuffle() : temp_storage_(private_storage())
78  {}
79 
80 
81  HIPCUB_DEVICE inline
82  BlockShuffle(TempStorage &temp_storage)
83  : temp_storage_(temp_storage)
84  {}
85 
92  HIPCUB_DEVICE inline void Offset(
93  T input,
94  T& output,
95  int distance = 1)
96  {
97  base_type::offset(input,output,distance);
98  }
99 
106  HIPCUB_DEVICE inline void Rotate(
107  T input,
108  T& output,
109  unsigned int distance = 1)
110  {
111  base_type::rotate(input,output,distance);
112  }
121  template <int ITEMS_PER_THREAD>
122  HIPCUB_DEVICE inline void Up(
123  T (&input)[ITEMS_PER_THREAD],
124  T (&prev)[ITEMS_PER_THREAD])
125  {
126  base_type::up(input,prev);
127  }
128 
129 
138  template <int ITEMS_PER_THREAD>
139  HIPCUB_DEVICE inline void Up(
140  T (&input)[ITEMS_PER_THREAD],
141  T (&prev)[ITEMS_PER_THREAD],
142  T &block_suffix)
143  {
144  base_type::up(input,prev,block_suffix);
145  }
146 
155  template <int ITEMS_PER_THREAD>
156  HIPCUB_DEVICE inline void Down(
157  T (&input)[ITEMS_PER_THREAD],
158  T (&next)[ITEMS_PER_THREAD])
159  {
160  base_type::down(input,next);
161  }
162 
171  template <int ITEMS_PER_THREAD>
172  HIPCUB_DEVICE inline void Down(
173  T (&input)[ITEMS_PER_THREAD],
174  T (&next)[ITEMS_PER_THREAD],
175  T &block_prefix)
176  {
177  base_type::down(input,next,block_prefix);
178  }
179 
180 private:
181  HIPCUB_DEVICE inline
182  TempStorage& private_storage()
183  {
184  HIPCUB_SHARED_MEMORY TempStorage private_storage;
185  return private_storage;
186  }
187 };
188 
189 END_HIPCUB_NAMESPACE
190 
191 #endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_SHUFFLE_HPP_
Definition: block_shuffle.hpp:56
__device__ void Down(T(&input)[ITEMS_PER_THREAD], T(&next)[ITEMS_PER_THREAD])
The thread block rotates its blocked arrangement of input items, shifting it down by one item.
Definition: block_shuffle.hpp:156
__device__ void Up(T(&input)[ITEMS_PER_THREAD], T(&prev)[ITEMS_PER_THREAD])
The thread block rotates its blocked arrangement of input items, shifting it up by one item.
Definition: block_shuffle.hpp:122
__device__ void Up(T(&input)[ITEMS_PER_THREAD], T(&prev)[ITEMS_PER_THREAD], T &block_suffix)
The thread block rotates its blocked arrangement of input items, shifting it up by one item....
Definition: block_shuffle.hpp:139
__device__ BlockShuffle(TempStorage &temp_storage)
Definition: block_shuffle.hpp:82
__device__ void Offset(T input, T &output, int distance=1)
Each threadi obtains the input provided by threadi+distance. The offset distance may be negative.
Definition: block_shuffle.hpp:92
__device__ void Rotate(T input, T &output, unsigned int distance=1)
Each threadi obtains the input provided by threadi+distance.
Definition: block_shuffle.hpp:106
__device__ void Down(T(&input)[ITEMS_PER_THREAD], T(&next)[ITEMS_PER_THREAD], T &block_prefix)
The thread block rotates its blocked arrangement of input items, shifting it down by one item....
Definition: block_shuffle.hpp:172