BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH > Class Template Reference

BlockRadixRank&lt; BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH &gt; Class Template Reference#

hipCUB: hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH > Class Template Reference
hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH > Class Template Reference

BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block. More...

#include <block_radix_rank.hpp>

Inheritance diagram for hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >:

Public Types

enum  { BINS_TRACKED_PER_THREAD = base_type::digits_per_thread }
 
using TempStorage = typename base_type::storage_type
 

Public Member Functions

Collective constructors
__device__ BlockRadixRank ()
 Collective constructor using a private static allocation of shared memory as temporary storage.
 
__device__ BlockRadixRank (TempStorage &temp_storage)
 Collective constructor using the specified memory allocation as temporary storage. More...
 
Ranking
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
__device__ void RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor)
 Rank keys. More...
 
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
__device__ void RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor, int(&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD])
 Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. More...
 

Detailed Description

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
class hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >

BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.

Template Parameters
BLOCK_DIM_XThe thread block length in threads along the X dimension
RADIX_BITSThe number of radix bits per digit place
IS_DESCENDINGWhether or not the sorted-order is high-to-low
MEMOIZE_OUTER_SCAN[optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details.
INNER_SCAN_ALGORITHM[optional] The hipcub::BlockScanAlgorithm algorithm to use (default: hipcub::BLOCK_SCAN_WARP_SCANS)
SMEM_CONFIG[optional] Shared memory bank mode (default: hipSharedMemBankSizeFourByte)
BLOCK_DIM_Y[optional] The thread block length in threads along the Y dimension (default: 1)
BLOCK_DIM_Z[optional] The thread block length in threads along the Z dimension (default: 1)
ARCH[optional] \ptxversion
Overview
Blah...
  • Keys must be in a form suitable for radix ranking (i.e., unsigned bits).
  • \blocked
Performance Considerations
  • \granularity
Examples
  • Example 1: Simple radix rank of 32-bit integer keys
    template <int BLOCK_THREADS>
    __global__ void ExampleKernel(...)
    {

Member Enumeration Documentation

◆ anonymous enum

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
anonymous enum
Enumerator
BINS_TRACKED_PER_THREAD 

Number of bin-starting offsets tracked per thread.

Constructor & Destructor Documentation

◆ BlockRadixRank()

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
__device__ hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::BlockRadixRank ( TempStorage &  temp_storage)
inline

Collective constructor using the specified memory allocation as temporary storage.

Parameters
[in]temp_storageReference to memory allocation having layout type TempStorage

Member Function Documentation

◆ RankKeys() [1/2]

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
__device__ void hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::RankKeys ( UnsignedBits(&)  keys[KEYS_PER_THREAD],
int(&)  ranks[KEYS_PER_THREAD],
DigitExtractorT  digit_extractor 
)
inline

Rank keys.

Parameters
[in]keysKeys for this tile
[out]ranksFor each key, the local rank within the tile
[in]digit_extractorThe digit extractor

◆ RankKeys() [2/2]

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
__device__ void hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::RankKeys ( UnsignedBits(&)  keys[KEYS_PER_THREAD],
int(&)  ranks[KEYS_PER_THREAD],
DigitExtractorT  digit_extractor,
int(&)  exclusive_digit_prefix[BINS_TRACKED_PER_THREAD] 
)
inline

Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread.

Parameters
[in]keysKeys for this tile
[out]ranksFor each key, the local rank within the tile (out parameter)
[in]digit_extractorThe digit extractor
[out]exclusive_digit_prefixThe exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]

The documentation for this class was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.7.0/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp