BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH > Class Template Reference#
hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH > Class Template Reference
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block. More...
#include <block_radix_rank.hpp>
Classes | |
struct | TempStorage |
\smemstorage{BlockScan} More... | |
Public Types | |
enum | { BINS_TRACKED_PER_THREAD = rocprim::maximum<int>()(1, (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS) } |
Public Member Functions | |
Collective constructors | |
__device__ | BlockRadixRank () |
Collective constructor using a private static allocation of shared memory as temporary storage. | |
__device__ | BlockRadixRank (TempStorage &temp_storage) |
Collective constructor using the specified memory allocation as temporary storage. More... | |
Raking | |
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT > | |
__device__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor) |
Rank keys. More... | |
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT > | |
__device__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor, int(&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) |
Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. More... | |
Detailed Description
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
class hipcub::BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
- Template Parameters
-
BLOCK_DIM_X The thread block length in threads along the X dimension RADIX_BITS The number of radix bits per digit place IS_DESCENDING Whether or not the sorted-order is high-to-low MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. INNER_SCAN_ALGORITHM [optional] The hipcub::BlockScanAlgorithm algorithm to use (default: hipcub::BLOCK_SCAN_WARP_SCANS) SMEM_CONFIG [optional] Shared memory bank mode (default: hipSharedMemBankSizeFourByte
)BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) ARCH [optional] \ptxversion
- Overview
- Blah...
- Keys must be in a form suitable for radix ranking (i.e., unsigned bits).
- \blocked
- Performance Considerations
- \granularity
- Examples
- Example 1: Simple radix rank of 32-bit integer keys
Member Enumeration Documentation
◆ anonymous enum
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
anonymous enum |
Constructor & Destructor Documentation
◆ BlockRadixRank()
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
|
inline |
Collective constructor using the specified memory allocation as temporary storage.
- Parameters
-
[in] temp_storage Reference to memory allocation having layout type TempStorage
Member Function Documentation
◆ RankKeys() [1/2]
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
|
inline |
Rank keys.
- Parameters
-
[in] keys Keys for this tile [out] ranks For each key, the local rank within the tile [in] digit_extractor The digit extractor
◆ RankKeys() [2/2]
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = 1>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
|
inline |
Rank keys. For the lower RADIX_DIGITS
threads, digit counts for each digit are provided for the corresponding thread.
- Parameters
-
[in] keys Keys for this tile [out] ranks For each key, the local rank within the tile (out parameter) [in] digit_extractor The digit extractor [out] exclusive_digit_prefix The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
The documentation for this class was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.4.2/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp