/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hip/checkouts/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h Source File#
amd_hip_cooperative_groups.h
Go to the documentation of this file.
62 __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size = static_cast<uint64_t>(0),
358 friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size);
359 friend __CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size);
360 friend __CG_QUALIFIER__ coalesced_group binary_partition(const coalesced_group& cgrp, bool pred);
931 __CG_QUALIFIER__ thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
972__CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size) {
988__CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, unsigned int tile_size) {
992__CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, unsigned int tile_size) {
997__CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size) {
1179struct tiled_partition_internal<size, thread_block> : public thread_block_tile<size, thread_block> {
The coalesced_group cooperative group type.
Definition amd_hip_cooperative_groups.h:355
The grid cooperative group type.
Definition amd_hip_cooperative_groups.h:189
Definition amd_hip_cooperative_groups.h:1005
thread_block_tile_internal(const thread_block &g)
Definition amd_hip_cooperative_groups.h:1012
thread_block_tile_internal(const thread_block_tile_internal< tbtSize, tbtParentT > &g)
Definition amd_hip_cooperative_groups.h:1008
The multi-grid cooperative group type.
Definition amd_hip_cooperative_groups.h:135
User exposed API that captures the state of the parent group pre-partition.
Definition amd_hip_cooperative_groups.h:883
thread_block_tile(const thread_block_tile< size, ParentCGTy > &g)
Definition amd_hip_cooperative_groups.h:1169
Definition amd_hip_cooperative_groups.h:816
Group type - thread_block_tile.
Definition amd_hip_cooperative_groups.h:906
Group type - thread_block_tile.
Definition amd_hip_cooperative_groups.h:1026
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
unsigned long long ballot(int pred) const
Ballot function on group level.
thread_block_tile(const ParentCGTy &g)
Definition amd_hip_cooperative_groups.h:1028
unsigned int meta_group_rank() const
unsigned int thread_rank() const
Rank of the calling thread within [0, size() ).
T shfl_xor(T var, unsigned int laneMask) const
Shuffle xor operation on group level.
unsigned long long match_any(T value) const
Match any function on group level.
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
T shfl(T var, int srcRank) const
Shuffle operation on group level.
The workgroup (thread-block in CUDA terminology) cooperative group type.
Definition amd_hip_cooperative_groups.h:229
The base type of all cooperative group types.
Definition amd_hip_cooperative_groups.h:50
Definition amd_hip_cooperative_groups.h:797
The tiled_group cooperative group type.
Definition amd_hip_cooperative_groups.h:301
bool is_valid(CGTy const &g)
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:776
void sync(CGTy const &g)
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:787
uint32_t thread_rank(CGTy const &g)
Returns the rank of thread of the group.
Definition amd_hip_cooperative_groups.h:763
uint32_t group_size(CGTy const &g)
Returns the size of the group.
Definition amd_hip_cooperative_groups.h:750
thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition amd_hip_cooperative_groups.h:290
coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition amd_hip_cooperative_groups.h:1215
thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition amd_hip_cooperative_groups.h:972
multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition amd_hip_cooperative_groups.h:175
coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition amd_hip_cooperative_groups.h:634
grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition amd_hip_cooperative_groups.h:218
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:341
T shfl_xor(T var, unsigned int laneMask) const
Definition amd_hip_cooperative_groups.h:853
static constexpr unsigned int numThreads
Definition amd_hip_cooperative_groups.h:799
friend multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition amd_hip_cooperative_groups.h:175
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:436
static void sync()
Definition amd_hip_cooperative_groups.h:834
uint32_t size() const
Definition amd_hip_cooperative_groups.h:91
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:204
unsigned int size() const
Definition amd_hip_cooperative_groups.h:426
unsigned long long match_all(T value, int &pred) const
Definition amd_hip_cooperative_groups.h:873
unsigned long long match_any(T value) const
Match any function on group level.
Definition amd_hip_cooperative_groups.h:600
friend thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition amd_hip_cooperative_groups.h:290
uint32_t _size
Type of the thread_group.
Definition amd_hip_cooperative_groups.h:53
static unsigned int meta_group_size()
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:892
unsigned int meta_group_rank() const
Definition amd_hip_cooperative_groups.h:442
dim3 group_dim() const
Definition amd_hip_cooperative_groups.h:205
unsigned int meta_group_rank() const
Definition amd_hip_cooperative_groups.h:946
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:447
unsigned int thread_rank() const
Rank of the calling thread within [0, size() ).
Definition amd_hip_cooperative_groups.h:336
unsigned int thread_rank() const
Rank of the calling thread within [0, size() ).
Definition amd_hip_cooperative_groups.h:431
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:951
int all(int pred) const
All function on group level.
Definition amd_hip_cooperative_groups.h:588
unsigned int cg_type() const
Returns the type of the group.
Definition amd_hip_cooperative_groups.h:93
T shfl(T var, int srcRank) const
Definition amd_hip_cooperative_groups.h:838
thread_block(uint32_t size)
Definition amd_hip_cooperative_groups.h:239
thread_group new_tiled_group(unsigned int tile_size) const
Definition amd_hip_cooperative_groups.h:242
thread_group(internal::group_type type, uint32_t size=static_cast< uint64_t >(0), uint64_t mask=static_cast< uint64_t >(0))
Definition amd_hip_cooperative_groups.h:62
uint32_t thread_rank() const
Rank of the calling thread within [0, size() ).
Definition amd_hip_cooperative_groups.h:200
tiled_group(unsigned int tileSize)
Definition amd_hip_cooperative_groups.h:325
unsigned int meta_group_rank
Definition amd_hip_cooperative_groups.h:72
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
Definition amd_hip_cooperative_groups.h:617
thread_block_tile_type()
Definition amd_hip_cooperative_groups.h:910
static uint32_t size()
Definition amd_hip_cooperative_groups.h:271
static constexpr unsigned int thread_rank()
Rank of the thread within this tile.
Definition amd_hip_cooperative_groups.h:803
bool is_tiled
Definition amd_hip_cooperative_groups.h:70
unsigned long long ballot(int pred) const
Ballot function on group level.
Definition amd_hip_cooperative_groups.h:566
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:159
T shfl_up(T var, unsigned int lane_delta) const
Definition amd_hip_cooperative_groups.h:848
uint32_t num_grids()
Definition amd_hip_cooperative_groups.h:149
static dim3 group_index()
Returns 3-dimensional block index within the grid.
Definition amd_hip_cooperative_groups.h:265
static void sync()
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:275
friend coalesced_group binary_partition(const thread_block_tile< fsize, fparent > &tgrp, bool pred)
bool is_valid() const
Returns true if the group has not violated any API constraints.
static unsigned int size()
Number of threads within this tile.
Definition amd_hip_cooperative_groups.h:808
friend thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition amd_hip_cooperative_groups.h:972
int any(int pred) const
Definition amd_hip_cooperative_groups.h:864
int all(int pred) const
Definition amd_hip_cooperative_groups.h:866
uint32_t thread_rank() const
Rank of the calling thread within [0, size() ).
Definition amd_hip_cooperative_groups.h:155
friend thread_group this_thread()
Definition amd_hip_cooperative_groups.h:960
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:202
unsigned long long match_any(T value) const
Definition amd_hip_cooperative_groups.h:868
unsigned int size() const
Definition amd_hip_cooperative_groups.h:333
uint64_t _mask
Total number of threads in the tread_group.
Definition amd_hip_cooperative_groups.h:54
unsigned int size
Definition amd_hip_cooperative_groups.h:71
struct cooperative_groups::thread_group::_coalesced_info coalesced_info
lane_mask member_mask
Definition amd_hip_cooperative_groups.h:77
static uint32_t thread_rank()
Rank of the calling thread within [0, size() ).
Definition amd_hip_cooperative_groups.h:269
unsigned int meta_group_size
Definition amd_hip_cooperative_groups.h:73
int any(int pred) const
Any function on group level.
Definition amd_hip_cooperative_groups.h:578
thread_group this_thread()
Definition amd_hip_cooperative_groups.h:960
static unsigned int meta_group_rank()
Definition amd_hip_cooperative_groups.h:887
unsigned int size
Definition amd_hip_cooperative_groups.h:78
struct _tiled_info tiled_info
Definition amd_hip_cooperative_groups.h:79
friend coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition amd_hip_cooperative_groups.h:634
friend coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition amd_hip_cooperative_groups.h:1215
static dim3 thread_index()
Returns 3-dimensional thread index within the block.
Definition amd_hip_cooperative_groups.h:267
friend class thread_block
Definition amd_hip_cooperative_groups.h:85
coalesced_group(lane_mask member_mask)
Definition amd_hip_cooperative_groups.h:415
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
Definition amd_hip_cooperative_groups.h:491
grid_group(uint32_t size)
Construct grid thread group (through the API this_grid())
Definition amd_hip_cooperative_groups.h:196
static bool is_valid()
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:273
dim3 group_dim()
Returns the group dimensions.
Definition amd_hip_cooperative_groups.h:277
thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
Definition amd_hip_cooperative_groups.h:931
unsigned long long ballot(int pred) const
Definition amd_hip_cooperative_groups.h:859
uint32_t thread_rank() const
Rank of the calling thread within [0, size() ).
T shfl(T var, int srcRank) const
Shuffle operation on group level.
Definition amd_hip_cooperative_groups.h:464
multi_grid_group(uint32_t size)
Construct mutli-grid thread group (through the API this_multi_grid())
Definition amd_hip_cooperative_groups.h:142
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:157
uint32_t grid_rank()
Definition amd_hip_cooperative_groups.h:153
friend grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition amd_hip_cooperative_groups.h:218
T shfl_down(T var, unsigned int lane_delta) const
Definition amd_hip_cooperative_groups.h:843
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
Definition amd_hip_cooperative_groups.h:532
Definition amd_hip_cooperative_groups.h:40
tiled_partition_internal(const thread_block &g)
Definition amd_hip_cooperative_groups.h:1180
Definition amd_hip_cooperative_groups.h:1176
Definition amd_hip_cooperative_groups.h:76
Definition amd_hip_cooperative_groups.h:69
Definition hip_runtime_api.h:1051