/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference

Go to the source code of this file.

Classes
struct	has_warp_tile_members< T, typename >

struct	has_warp_tile_members< T, std::void_t< decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)> >

Macros
#define	START_JSON_DUMP_FILE(file_name)

#define	END_JSON_DUMP_FILE() std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;

#define	ADD_KEY_VALUE(key, value)

#define	ADD_PERF_TO_JSON(_time, tflops, gbytes)

Functions
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits>
void	dump_gemm_json_results (const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")

void	dump_batched_gemm_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")

template<typename ALayout , typename BLayout , typename CLayout >
void	dump_grouped_gemm_json_results (const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")

void	dump_flatmm_json_results (const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")

void	dump_gemm_multi_d_fp16_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")

void	dump_elementwise_json_results (const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")

void	dump_layernorm2d_fwd_json_results (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")

template<typename DataType , template< typename > typename DTypeTraits>
void	dump_reduce_json_results (const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")

void	dump_permute_json_results (const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")

void	dump_topk_softmax_json (const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")

void	dump_rmsnorm2d_fwd_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")

void	dump_add_rmsnorm2d_rdquant_fwd_json (const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")

void	dump_smoothquant_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")

void	dump_moe_sorting_json (const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")

void	dump_batched_transpose_json (const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")

void	dump_moe_smoothquant_json (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")

void	dump_fused_moe_json (const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")

void	dump_fmha_fwd_json_results (const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, const std::string &qscale, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")

void	dump_fmha_bwd_json_results (const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")

Macro Definition Documentation

◆ ADD_KEY_VALUE

#define ADD_KEY_VALUE	(	key,
		value
	)

◆ ADD_PERF_TO_JSON

#define ADD_PERF_TO_JSON	(	_time,
		tflops,
		gbytes
	)

◆ END_JSON_DUMP_FILE

#define END_JSON_DUMP_FILE ( ) std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;

◆ START_JSON_DUMP_FILE

#define START_JSON_DUMP_FILE ( file_name )

Function Documentation

◆ dump_add_rmsnorm2d_rdquant_fwd_json()

void dump_add_rmsnorm2d_rdquant_fwd_json	(	const std::string &	json_filename,
		const std::string &	input_data_type,
		const std::string &	quantized_data_type,
		int	m,
		int	n,
		int	stride,
		float	epsilon,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"add_rmsnorm2d_rdquant_fwd"`
	)

inline

◆ dump_batched_gemm_json_results()

void dump_batched_gemm_json_results	(	const std::string &	json_filename,
		const std::string &	op_name,
		int	M,
		int	N,
		int	K,
		int	stride_A,
		int	stride_B,
		int	stride_C,
		int	batch_stride_A,
		int	batch_stride_B,
		int	batch_stride_C,
		int	batch_count,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"batched_gemm_basic"`
	)

inline

◆ dump_batched_transpose_json()

void dump_batched_transpose_json	(	const std::string &	json_filename,
		int	N,
		int	C,
		int	H,
		int	W,
		const std::string &	layout_in,
		const std::string &	layout_out,
		const std::string &	prec,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"batched_transpose"`
	)

inline

◆ dump_elementwise_json_results()

void dump_elementwise_json_results	(	const std::string &	json_filename,
		const std::string &	prec,
		int	grid_size,
		int	block_size,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"elementwise"`
	)

inline

◆ dump_flatmm_json_results()

void dump_flatmm_json_results	(	const std::string &	json_filename,
		const std::string &	datatype,
		int	M,
		int	N,
		int	K,
		int	stride_A,
		int	stride_B,
		int	stride_C,
		int	kbatch,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"flatmm_basic"`
	)

inline

◆ dump_fmha_bwd_json_results()

void dump_fmha_bwd_json_results	(	const std::string &	json_filename,
		const std::string &	data_type,
		const std::string &	mode,
		const std::string &	i_perm,
		const std::string &	o_perm,
		int	batch,
		int	nhead,
		int	nhead_k,
		int	seqlen_q,
		int	seqlen_k,
		int	hdim_q,
		int	hdim_v,
		float	scale,
		const std::string &	bias,
		bool	use_dbias,
		float	p_drop,
		bool	s_randval,
		bool	deterministic,
		const std::string &	mask,
		int	mask_left,
		int	mask_right,
		int	workspace_size,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"fmha_bwd"`
	)

inline

◆ dump_fmha_fwd_json_results()

void dump_fmha_fwd_json_results	(	const std::string &	json_filename,
		const std::string &	prec,
		const std::string &	mode,
		const std::string &	io_layout,
		int	batch,
		int	nhead,
		int	nhead_k,
		int	seqlen_qs,
		int	seqlen_ks,
		int	seqlen_kpads,
		int	hdim_q,
		int	hdim_v,
		float	scale_s,
		float	p_drop,
		bool	lse,
		const std::string &	qscale,
		const std::string &	bias,
		const std::string &	vlayout,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"fmha_fwd"`
	)

inline

◆ dump_fused_moe_json()

void dump_fused_moe_json	(	const std::string &	json_filename,
		const std::string &	api_str,
		const std::string &	prec_str,
		int	tokens,
		bool	is_local_token,
		int	local_tokens,
		int	experts,
		int	topk,
		int	hidden_size,
		int	intermediate_size,
		int	stride,
		int	block_m,
		int	activation,
		bool	gate_only,
		bool	fused_quant,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	tb_per_sec,
		const std::string &	kernel_name = `"fused_moe"`
	)

inline

◆ dump_gemm_json_results()

template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits>

void dump_gemm_json_results	(	const std::string &	json_filename,
		int	M,
		int	N,
		int	K,
		int	stride_A,
		int	stride_B,
		int	stride_C,
		bool	persistent,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"gemm_basic"`
	)

◆ dump_gemm_multi_d_fp16_json_results()

void dump_gemm_multi_d_fp16_json_results	(	const std::string &	json_filename,
		const std::string &	op_name,
		int	M,
		int	N,
		int	K,
		int	StrideA,
		int	StrideB,
		int	StrideD0,
		int	StrideD1,
		int	StrideE,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"gemm_multi_d_fp16"`
	)

inline

◆ dump_grouped_gemm_json_results()

template<typename ALayout , typename BLayout , typename CLayout >

void dump_grouped_gemm_json_results	(	const std::string &	json_filename,
		const std::string &	op_name,
		int	group_count,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"grouped_gemm"`
	)

◆ dump_layernorm2d_fwd_json_results()

void dump_layernorm2d_fwd_json_results	(	const std::string &	json_filename,
		const std::string &	prec_i,
		const std::string &	prec_o,
		const std::string &	prec_sm,
		const std::string &	prec_sy,
		int	m,
		int	n,
		int	x_stride,
		int	xr_stride,
		int	y_stride,
		int	yr_stride,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"layernorm2d_fwd"`
	)

inline

◆ dump_moe_smoothquant_json()

void dump_moe_smoothquant_json	(	const std::string &	json_filename,
		const std::string &	prec_i,
		const std::string &	prec_o,
		int	tokens,
		int	hidden_size,
		int	stride,
		int	experts,
		int	topk,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"moe_smoothquant"`
	)

inline

◆ dump_moe_sorting_json()

void dump_moe_sorting_json	(	const std::string &	json_filename,
		const std::string &	index_prec,
		const std::string &	weight_prec,
		const std::string &	workspace_size,
		int	dispatch_policy,
		int	tokens,
		int	num_experts,
		int	topk,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"moe_sorting"`
	)

inline

◆ dump_permute_json_results()

void dump_permute_json_results	(	const std::string &	json_filename,
		const std::string &	data_type,
		bool	pass,
		float	ave_time,
		float	tflop,
		float	gb_per_sec,
		const std::string &	kernel_name = `"permute"`
	)

inline

◆ dump_reduce_json_results()

template<typename DataType , template< typename > typename DTypeTraits>

void dump_reduce_json_results	(	const std::string &	json_filename,
		int	N,
		int	C,
		int	H,
		int	W,
		bool	pass,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		const std::string &	kernel_name = `"reduce"`
	)

◆ dump_rmsnorm2d_fwd_json()

void dump_rmsnorm2d_fwd_json	(	const std::string &	json_filename,
		const std::string &	prec_str,
		int	m,
		int	n,
		int	x_stride,
		int	xr_stride,
		int	y_stride,
		int	yr_stride,
		int	use_model_sensitive_rmsnorm,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"rmsnorm2d_fwd"`
	)

inline

◆ dump_smoothquant_json()

void dump_smoothquant_json	(	const std::string &	json_filename,
		const std::string &	prec_str,
		int	m,
		int	n,
		int	x_stride,
		int	y_stride,
		float	ave_time,
		float	tflops,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"smoothquant"`
	)

inline

◆ dump_topk_softmax_json()

void dump_topk_softmax_json	(	const std::string &	json_filename,
		const std::string &	input_prec,
		const std::string &	weight_prec,
		int	tokens,
		int	experts,
		int	topk,
		int	stride_input,
		int	stride_output,
		float	ave_time,
		float	tflop,
		float	gb_per_sec,
		bool	pass,
		const std::string &	kernel_name = `"topk_softmax"`
	)

inline

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference#

Classes

Macros

Functions

Macro Definition Documentation

◆ ADD_KEY_VALUE

◆ ADD_PERF_TO_JSON

◆ END_JSON_DUMP_FILE

◆ START_JSON_DUMP_FILE

Function Documentation

◆ dump_add_rmsnorm2d_rdquant_fwd_json()

◆ dump_batched_gemm_json_results()

◆ dump_batched_transpose_json()

◆ dump_elementwise_json_results()

◆ dump_flatmm_json_results()

◆ dump_fmha_bwd_json_results()

◆ dump_fmha_fwd_json_results()

◆ dump_fused_moe_json()

◆ dump_gemm_json_results()

◆ dump_gemm_multi_d_fp16_json_results()

◆ dump_grouped_gemm_json_results()

◆ dump_layernorm2d_fwd_json_results()

◆ dump_moe_smoothquant_json()

◆ dump_moe_sorting_json()

◆ dump_permute_json_results()

◆ dump_reduce_json_results()

◆ dump_rmsnorm2d_fwd_json()

◆ dump_smoothquant_json()

◆ dump_topk_softmax_json()