clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h Source File#
amd_hip_cooperative_groups.h
  
Go to the documentation of this file.
  377   friend __CG_QUALIFIER__ coalesced_group binary_partition(const coalesced_group& cgrp, bool pred);
  991 __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size) {
 1005 __CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, unsigned int tile_size) {
 1009 __CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, unsigned int tile_size) {
 1205   static_assert(size <= ParentSize, "Sub tile size must be <= parent tile size in tiled_partition");
 1207   __CG_QUALIFIER__ tiled_partition_internal(const thread_block_tile<ParentSize, GrandParentCGTy>& g)
The coalesced_group cooperative group type.
Definition: amd_hip_cooperative_groups.h:370
The grid cooperative group type.
Definition: amd_hip_cooperative_groups.h:193
Definition: amd_hip_cooperative_groups.h:1023
thread_block_tile_internal(const thread_block &g)
Definition: amd_hip_cooperative_groups.h:1029
thread_block_tile_internal(const thread_block_tile_internal< tbtSize, tbtParentT > &g)
Definition: amd_hip_cooperative_groups.h:1025
The multi-grid cooperative group type.
Definition: amd_hip_cooperative_groups.h:138
User exposed API that captures the state of the parent group pre-partition.
Definition: amd_hip_cooperative_groups.h:891
Definition: amd_hip_cooperative_groups.h:1180
thread_block_tile(const thread_block_tile< size, ParentCGTy > &g)
Definition: amd_hip_cooperative_groups.h:1186
Definition: amd_hip_cooperative_groups.h:832
Group type - thread_block_tile.
Definition: amd_hip_cooperative_groups.h:914
Group type - thread_block_tile.
Definition: amd_hip_cooperative_groups.h:1043
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
unsigned long long ballot(int pred) const
Ballot function on group level.
thread_block_tile(const ParentCGTy &g)
Definition: amd_hip_cooperative_groups.h:1045
unsigned int meta_group_rank() const
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
T shfl_xor(T var, unsigned int laneMask) const
Shuffle xor operation on group level.
unsigned long long match_any(T value) const
Match any function on group level.
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
T shfl(T var, int srcRank) const
Shuffle operation on group level.
The workgroup (thread-block in CUDA terminology) cooperative group type.
Definition: amd_hip_cooperative_groups.h:234
The base type of all cooperative group types.
Definition: amd_hip_cooperative_groups.h:50
Definition: amd_hip_cooperative_groups.h:809
The tiled_group cooperative group type.
Definition: amd_hip_cooperative_groups.h:313
const struct texture< T, dim, readMode > const void size_t size
Definition: hip_runtime_api.h:9903
bool is_valid(CGTy const &g)
Returns true if the group has not violated any API constraints.
Definition: amd_hip_cooperative_groups.h:788
void sync(CGTy const &g)
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:799
__hip_uint32_t group_size(CGTy const &g)
Returns the size of the group.
Definition: amd_hip_cooperative_groups.h:760
__hip_uint32_t thread_rank(CGTy const &g)
Returns the rank of thread of the group.
Definition: amd_hip_cooperative_groups.h:775
thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition: amd_hip_cooperative_groups.h:302
coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition: amd_hip_cooperative_groups.h:1242
thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition: amd_hip_cooperative_groups.h:991
multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition: amd_hip_cooperative_groups.h:179
coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition: amd_hip_cooperative_groups.h:644
grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition: amd_hip_cooperative_groups.h:223
void sync() const
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:358
T shfl_xor(T var, unsigned int laneMask) const
Definition: amd_hip_cooperative_groups.h:863
static constexpr unsigned int numThreads
Definition: amd_hip_cooperative_groups.h:811
friend multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition: amd_hip_cooperative_groups.h:179
void sync() const
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:458
static void sync()
Definition: amd_hip_cooperative_groups.h:849
unsigned int num_threads
Definition: amd_hip_cooperative_groups.h:72
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:205
void sync() const
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:209
__hip_uint32_t size() const
Total number of threads in the group (alias of num_threads())
Definition: amd_hip_cooperative_groups.h:94
unsigned int size() const
Total number of threads in the group (alias of num_threads())
Definition: amd_hip_cooperative_groups.h:450
__hip_uint32_t num_grids()
Definition: amd_hip_cooperative_groups.h:151
unsigned long long match_all(T value, int &pred) const
Definition: amd_hip_cooperative_groups.h:882
unsigned long long match_any(T value) const
Match any function on group level.
Definition: amd_hip_cooperative_groups.h:610
friend thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition: amd_hip_cooperative_groups.h:302
static unsigned int meta_group_size()
Returns the number of groups created when the parent group was partitioned.
Definition: amd_hip_cooperative_groups.h:900
unsigned int meta_group_rank() const
Definition: amd_hip_cooperative_groups.h:462
dim3 group_dim() const
Definition: amd_hip_cooperative_groups.h:210
unsigned int meta_group_rank() const
Definition: amd_hip_cooperative_groups.h:965
__hip_uint32_t _num_threads
Type of the thread_group.
Definition: amd_hip_cooperative_groups.h:53
static unsigned int num_threads()
Number of threads within this tile.
Definition: amd_hip_cooperative_groups.h:820
thread_group(internal::group_type type, __hip_uint32_t num_threads=static_cast< __hip_uint64_t >(0), __hip_uint64_t mask=static_cast< __hip_uint64_t >(0))
Definition: amd_hip_cooperative_groups.h:62
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition: amd_hip_cooperative_groups.h:467
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:353
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:453
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition: amd_hip_cooperative_groups.h:970
int all(int pred) const
All function on group level.
Definition: amd_hip_cooperative_groups.h:598
unsigned int cg_type() const
Returns the type of the group.
Definition: amd_hip_cooperative_groups.h:96
T shfl(T var, int srcRank) const
Definition: amd_hip_cooperative_groups.h:851
thread_group new_tiled_group(unsigned int tile_size) const
Definition: amd_hip_cooperative_groups.h:248
static __hip_uint32_t size()
Total number of threads in the group (alias of num_threads())
Definition: amd_hip_cooperative_groups.h:283
unsigned int num_threads() const
Definition: amd_hip_cooperative_groups.h:345
unsigned int num_threads() const
Definition: amd_hip_cooperative_groups.h:447
tiled_group(unsigned int tileSize)
Definition: amd_hip_cooperative_groups.h:337
unsigned int meta_group_rank
Definition: amd_hip_cooperative_groups.h:73
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
Definition: amd_hip_cooperative_groups.h:627
thread_block_tile_type()
Definition: amd_hip_cooperative_groups.h:919
grid_group(__hip_uint32_t size)
Construct grid thread group (through the API this_grid())
Definition: amd_hip_cooperative_groups.h:200
__hip_uint32_t grid_rank()
Definition: amd_hip_cooperative_groups.h:155
static constexpr unsigned int thread_rank()
Rank of the thread within this tile.
Definition: amd_hip_cooperative_groups.h:815
bool is_tiled
Definition: amd_hip_cooperative_groups.h:71
unsigned long long ballot(int pred) const
Ballot function on group level.
Definition: amd_hip_cooperative_groups.h:576
void sync() const
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:163
T shfl_up(T var, unsigned int lane_delta) const
Definition: amd_hip_cooperative_groups.h:859
static dim3 group_index()
Returns 3-dimensional block index within the grid.
Definition: amd_hip_cooperative_groups.h:271
unsigned int num_threads
Definition: amd_hip_cooperative_groups.h:79
static void sync()
Synchronizes the threads in the group.
Definition: amd_hip_cooperative_groups.h:287
friend coalesced_group binary_partition(const thread_block_tile< fsize, fparent > &tgrp, bool pred)
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:157
bool is_valid() const
Returns true if the group has not violated any API constraints.
static unsigned int size()
Definition: amd_hip_cooperative_groups.h:824
friend thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition: amd_hip_cooperative_groups.h:991
int any(int pred) const
Definition: amd_hip_cooperative_groups.h:873
int all(int pred) const
Definition: amd_hip_cooperative_groups.h:875
__hip_uint64_t _mask
Total number of threads in the thread_group.
Definition: amd_hip_cooperative_groups.h:54
friend thread_group this_thread()
Definition: amd_hip_cooperative_groups.h:979
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition: amd_hip_cooperative_groups.h:207
__hip_uint32_t _type
Definition: amd_hip_cooperative_groups.h:52
unsigned long long match_any(T value) const
Definition: amd_hip_cooperative_groups.h:877
unsigned int size() const
Total number of threads in the group (alias of num_threads())
Definition: amd_hip_cooperative_groups.h:350
struct cooperative_groups::thread_group::_coalesced_info coalesced_info
lane_mask member_mask
Definition: amd_hip_cooperative_groups.h:78
thread_block(__hip_uint32_t size)
Definition: amd_hip_cooperative_groups.h:245
unsigned int meta_group_size
Definition: amd_hip_cooperative_groups.h:74
multi_grid_group(__hip_uint32_t size)
Construct multi-grid thread group (through the API this_multi_grid())
Definition: amd_hip_cooperative_groups.h:145
int any(int pred) const
Any function on group level.
Definition: amd_hip_cooperative_groups.h:588
thread_group this_thread()
Definition: amd_hip_cooperative_groups.h:979
static unsigned int meta_group_rank()
Definition: amd_hip_cooperative_groups.h:895
__hip_uint32_t num_threads() const
Definition: amd_hip_cooperative_groups.h:92
struct _tiled_info tiled_info
Definition: amd_hip_cooperative_groups.h:80
friend coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition: amd_hip_cooperative_groups.h:644
friend coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition: amd_hip_cooperative_groups.h:1242
static dim3 thread_index()
Returns 3-dimensional thread index within the block.
Definition: amd_hip_cooperative_groups.h:273
friend class thread_block
Definition: amd_hip_cooperative_groups.h:86
coalesced_group(lane_mask member_mask)
Definition: amd_hip_cooperative_groups.h:435
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
Definition: amd_hip_cooperative_groups.h:508
static bool is_valid()
Returns true if the group has not violated any API constraints.
Definition: amd_hip_cooperative_groups.h:285
dim3 group_dim()
Returns the group dimensions.
Definition: amd_hip_cooperative_groups.h:289
thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
Definition: amd_hip_cooperative_groups.h:948
static __hip_uint32_t num_threads()
Definition: amd_hip_cooperative_groups.h:279
unsigned long long ballot(int pred) const
Definition: amd_hip_cooperative_groups.h:868
T shfl(T var, int srcRank) const
Shuffle operation on group level.
Definition: amd_hip_cooperative_groups.h:483
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition: amd_hip_cooperative_groups.h:161
static __hip_uint32_t thread_rank()
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:275
friend grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition: amd_hip_cooperative_groups.h:223
T shfl_down(T var, unsigned int lane_delta) const
Definition: amd_hip_cooperative_groups.h:855
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
Definition: amd_hip_cooperative_groups.h:545
thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
Definition: amd_hip_cooperative_groups.h:924
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition: amd_hip_cooperative_groups.h:40
tiled_partition_internal(const thread_block &g)
Definition: amd_hip_cooperative_groups.h:1197
tiled_partition_internal(const thread_block_tile< ParentSize, GrandParentCGTy > &g)
Definition: amd_hip_cooperative_groups.h:1207
Definition: amd_hip_cooperative_groups.h:1193
Definition: amd_hip_cooperative_groups.h:77
Definition: amd_hip_cooperative_groups.h:70
Definition: hip_runtime_api.h:1278