/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-7.0.0/include/ck_tile/host/kernel_launch.hpp File Reference

#include "ck_tile/core/config.hpp"
#include "ck_tile/core/utility/ignore.hpp"
#include "ck_tile/host/hip_check_error.hpp"
#include "ck_tile/host/stream_config.hpp"
#include "ck_tile/host/timer.hpp"
#include <cstddef>
#include <hip/hip_runtime.h>

Go to the source code of this file.

Namespaces
	ck_tile

Macros
#define	LOW_CU_PROCESSORS 80

#define	HIGH_CU_PROCESSORS 228

#define	OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005

#define	OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015

#define	OPTIMAL_LATENCY_SAFE_MARGIN 0.01

Functions
template<int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel , typename... Args>
__global__ void	ck_tile::kentry (Args... args)

template<int MaxThreadPerBlock = CK_TILE_MAX_THREAD_PER_BLOCK, int MinBlockPerCu = CK_TILE_MIN_BLOCK_PER_CU, typename KernelImpl , typename... Args>
CK_TILE_HOST auto	ck_tile::make_kernel (KernelImpl, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)

template<typename... Callables>
CK_TILE_HOST void	ck_tile::launch_and_check (const stream_config &sc, Callables &&... callables)

template<typename... Callables>
CK_TILE_HOST float	ck_tile::launch_kernel (const stream_config &s, Callables &&... callables)

template<typename PreprocessFunc , typename... Callables>
CK_TILE_HOST float	ck_tile::launch_kernel_preprocess (const stream_config &s, PreprocessFunc preprocess, Callables &&... callables)

Macro Definition Documentation

◆ HIGH_CU_PROCESSORS

#define HIGH_CU_PROCESSORS 228

◆ LOW_CU_PROCESSORS

#define LOW_CU_PROCESSORS 80

◆ OPTIMAL_LATENCY_HIGH_CU_PROCESSORS

#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015

◆ OPTIMAL_LATENCY_LOW_CU_PROCESSORS

#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005

◆ OPTIMAL_LATENCY_SAFE_MARGIN

#define OPTIMAL_LATENCY_SAFE_MARGIN 0.01