/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp Source File
grid_even_share.hpp
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_GRID_GRID_EVEN_SHARE_HPP_
31 #define HIPCUB_ROCPRIM_GRID_GRID_EVEN_SHARE_HPP_
32 
33 #include <type_traits>
34 
35 #include "../../../config.hpp"
36 #include "../../../grid/grid_mapping.hpp"
37 #include "../util_type.hpp"
38 
39 BEGIN_HIPCUB_NAMESPACE
40 
66 template <typename OffsetT>
68 {
69 private:
70 
71  int total_tiles;
72  int big_shares;
73  OffsetT big_share_items;
74  OffsetT normal_share_items;
75  OffsetT normal_base_offset;
76 
77 public:
78 
80  OffsetT num_items;
81 
83  int grid_size;
84 
86  OffsetT block_offset;
87 
89  OffsetT block_end;
90 
92  OffsetT block_stride;
93 
94 
98  __host__ __device__ __forceinline__ GridEvenShare() :
99  total_tiles(0),
100  big_shares(0),
101  big_share_items(0),
102  normal_share_items(0),
103  normal_base_offset(0),
104  num_items(0),
105  grid_size(0),
106  block_offset(0),
107  block_end(0),
108  block_stride(0)
109  {}
110 
111 
115  __host__ __device__ __forceinline__ void DispatchInit(
116  OffsetT num_items_,
117  int max_grid_size,
118  int tile_items)
119  {
120  this->block_offset = num_items_; // Initialize past-the-end
121  this->block_end = num_items_; // Initialize past-the-end
122  this->num_items = num_items_;
123  this->total_tiles = static_cast<int>(hipcub::DivideAndRoundUp(num_items_, tile_items));
124  this->grid_size = min(total_tiles, max_grid_size);
125  int avg_tiles_per_block = total_tiles / grid_size;
126  // leftover grains go to big blocks:
127  this->big_shares = total_tiles - (avg_tiles_per_block * grid_size);
128  this->normal_share_items = avg_tiles_per_block * tile_items;
129  this->normal_base_offset = big_shares * tile_items;
130  this->big_share_items = normal_share_items + tile_items;
131  }
132 
133 
139  template <int TILE_ITEMS>
140  __device__ __forceinline__ void BlockInit(
141  int block_id,
142  Int2Type<GRID_MAPPING_RAKE> /*strategy_tag*/)
143  {
144  block_stride = TILE_ITEMS;
145  if (block_id < big_shares)
146  {
147  // This thread block gets a big share of grains (avg_tiles_per_block + 1)
148  block_offset = (block_id * big_share_items);
149  block_end = block_offset + big_share_items;
150  }
151  else if (block_id < total_tiles)
152  {
153  // This thread block gets a normal share of grains (avg_tiles_per_block)
154  block_offset = normal_base_offset + (block_id * normal_share_items);
155  block_end = min(num_items, block_offset + normal_share_items);
156  }
157  // Else default past-the-end
158  }
159 
160 
166  template <int TILE_ITEMS>
167  __device__ __forceinline__ void BlockInit(
168  int block_id,
169  Int2Type<GRID_MAPPING_STRIP_MINE> /*strategy_tag*/)
170  {
171  block_stride = grid_size * TILE_ITEMS;
172  block_offset = (block_id * TILE_ITEMS);
173  block_end = num_items;
174  }
175 
176 
182  template <
183  int TILE_ITEMS,
184  GridMappingStrategy STRATEGY>
185  __device__ __forceinline__ void BlockInit()
186  {
187  BlockInit<TILE_ITEMS>(blockIdx.x, Int2Type<STRATEGY>());
188  }
189 
190 
196  template <int TILE_ITEMS>
197  __device__ __forceinline__ void BlockInit(
198  OffsetT block_offset,
199  OffsetT block_end)
200  {
201  this->block_offset = block_offset;
202  this->block_end = block_end;
203  this->block_stride = TILE_ITEMS;
204  }
205 
206 
207 };
208 
209  // end group GridModule
211 
212 END_HIPCUB_NAMESPACE
213 
214 #endif // HIPCUB_ROCPRIM_GRID_GRID_EVEN_SHARE_HPP_
GridMappingStrategy
cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device...
Definition: grid_mapping.hpp:55
GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-sha...
Definition: grid_even_share.hpp:68
OffsetT block_stride
Stride between input tiles.
Definition: grid_even_share.hpp:92
__host__ __device__ __forceinline__ GridEvenShare()
Constructor.
Definition: grid_even_share.hpp:98
__device__ __forceinline__ void BlockInit(OffsetT block_offset, OffsetT block_end)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
Definition: grid_even_share.hpp:197
__host__ __device__ __forceinline__ void DispatchInit(OffsetT num_items_, int max_grid_size, int tile_items)
Dispatch initializer. To be called prior to kernel launch.
Definition: grid_even_share.hpp:115
OffsetT block_end
OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles.
Definition: grid_even_share.hpp:89
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_RAKE >)
Initializes ranges for the specified thread block index. Specialized for a "raking" access pattern in...
Definition: grid_even_share.hpp:140
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_STRIP_MINE >)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
Definition: grid_even_share.hpp:167
OffsetT block_offset
OffsetT into input marking the beginning of the owning thread block's segment of input tiles.
Definition: grid_even_share.hpp:86
int grid_size
Grid size in thread blocks.
Definition: grid_even_share.hpp:83
__device__ __forceinline__ void BlockInit()
Block-initialization, specialized for "strip mining" access pattern in which the input tiles assigned...
Definition: grid_even_share.hpp:185
OffsetT num_items
Total number of input items.
Definition: grid_even_share.hpp:80
Definition: util_type.hpp:144