Go to the source code of this file.
|
template<int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel , typename... Args> |
__global__ void | ck_tile::kentry (Args... args) |
|
template<int MaxThreadPerBlock = CK_TILE_MAX_THREAD_PER_BLOCK, int MinBlockPerCu = CK_TILE_MIN_BLOCK_PER_CU, typename KernelImpl , typename... Args> |
CK_TILE_HOST auto | ck_tile::make_kernel (KernelImpl, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) |
|
template<typename... Callables> |
CK_TILE_HOST void | ck_tile::launch_and_check (const stream_config &sc, Callables &&... callables) |
|
template<typename... Callables> |
CK_TILE_HOST float | ck_tile::launch_kernel (const stream_config &s, Callables &&... callables) |
|
template<typename PreprocessFunc , typename... Callables> |
CK_TILE_HOST float | ck_tile::launch_kernel_preprocess (const stream_config &s, PreprocessFunc preprocess, Callables &&... callables) |
|
◆ HIGH_CU_PROCESSORS
#define HIGH_CU_PROCESSORS 228 |
◆ LOW_CU_PROCESSORS
#define LOW_CU_PROCESSORS 80 |
◆ OPTIMAL_LATENCY_HIGH_CU_PROCESSORS
#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015 |
◆ OPTIMAL_LATENCY_LOW_CU_PROCESSORS
#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005 |
◆ OPTIMAL_LATENCY_SAFE_MARGIN
#define OPTIMAL_LATENCY_SAFE_MARGIN 0.01 |