/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference
kernel_launch.hpp File Reference
#include "ck_tile/core/config.hpp"
#include "ck_tile/core/utility/ignore.hpp"
#include "ck_tile/host/hip_check_error.hpp"
#include "ck_tile/host/stream_config.hpp"
#include "ck_tile/host/timer.hpp"
#include <cstddef>
#include <hip/hip_runtime.h>

Go to the source code of this file.

Namespaces

 ck_tile
 

Macros

#define LOW_CU_PROCESSORS   80
 
#define HIGH_CU_PROCESSORS   228
 
#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS   0.005
 
#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS   0.0015
 
#define OPTIMAL_LATENCY_SAFE_MARGIN   0.01
 

Functions

template<int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel , typename... Args>
__global__ void ck_tile::kentry (Args... args)
 
template<int MaxThreadPerBlock = CK_TILE_MAX_THREAD_PER_BLOCK, int MinBlockPerCu = CK_TILE_MIN_BLOCK_PER_CU, typename KernelImpl , typename... Args>
CK_TILE_HOST auto ck_tile::make_kernel (KernelImpl, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
 
template<typename... Callables>
CK_TILE_HOST void ck_tile::launch_and_check (const stream_config &sc, Callables &&... callables)
 
template<typename... Callables>
CK_TILE_HOST float ck_tile::launch_kernel (const stream_config &s, Callables &&... callables)
 
template<typename PreprocessFunc , typename... Callables>
CK_TILE_HOST float ck_tile::launch_kernel_preprocess (const stream_config &s, PreprocessFunc preprocess, Callables &&... callables)
 

Macro Definition Documentation

◆ HIGH_CU_PROCESSORS

#define HIGH_CU_PROCESSORS   228

◆ LOW_CU_PROCESSORS

#define LOW_CU_PROCESSORS   80

◆ OPTIMAL_LATENCY_HIGH_CU_PROCESSORS

#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS   0.0015

◆ OPTIMAL_LATENCY_LOW_CU_PROCESSORS

#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS   0.005

◆ OPTIMAL_LATENCY_SAFE_MARGIN

#define OPTIMAL_LATENCY_SAFE_MARGIN   0.01