Go to the source code of this file.
|
| template<int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel , typename... Args> |
| __global__ void | ck_tile::kentry (Args... args) |
| |
| template<int MaxThreadPerBlock = CK_TILE_MAX_THREAD_PER_BLOCK, int MinBlockPerCu = CK_TILE_MIN_BLOCK_PER_CU, typename KernelImpl , typename... Args> |
| CK_TILE_HOST auto | ck_tile::make_kernel (KernelImpl, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) |
| |
| template<typename... Callables> |
| CK_TILE_HOST void | ck_tile::launch_and_check (const stream_config &sc, Callables &&... callables) |
| |
| template<typename... Callables> |
| CK_TILE_HOST float | ck_tile::launch_kernel (const stream_config &s, Callables &&... callables) |
| |
| template<typename PreprocessFunc , typename... Callables> |
| CK_TILE_HOST float | ck_tile::launch_kernel_preprocess (const stream_config &s, PreprocessFunc preprocess, Callables &&... callables) |
| |
◆ HIGH_CU_PROCESSORS
| #define HIGH_CU_PROCESSORS 228 |
◆ LOW_CU_PROCESSORS
| #define LOW_CU_PROCESSORS 80 |
◆ OPTIMAL_LATENCY_HIGH_CU_PROCESSORS
| #define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015 |
◆ OPTIMAL_LATENCY_LOW_CU_PROCESSORS
| #define OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005 |
◆ OPTIMAL_LATENCY_SAFE_MARGIN
| #define OPTIMAL_LATENCY_SAFE_MARGIN 0.01 |