/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference
json_dump.hpp File Reference

Go to the source code of this file.

Classes

struct  has_warp_tile_members< T, typename >
 
struct  has_warp_tile_members< T, std::void_t< decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)> >
 

Macros

#define START_JSON_DUMP_FILE(file_name)
 
#define END_JSON_DUMP_FILE()
 
#define ADD_KEY_VALUE(key, value)   add_key_value_pair(writer, key, value);
 
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)   add_perf_to_json(writer, _time, tflops, gbytes);
 

Functions

template<typename T >
void add_key_value_pair (rapidjson::Writer< rapidjson::StringBuffer > &writer, const char *key, T value)
 
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits>
void dump_gemm_json_results (const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
 
void dump_batched_gemm_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
 
template<typename ALayout , typename BLayout , typename CLayout >
void dump_grouped_gemm_json_results (const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
 
void dump_flatmm_json_results (const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
 
void dump_gemm_multi_d_fp16_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
 
void dump_elementwise_json_results (const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
 
void dump_layernorm2d_fwd_json_results (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
 
template<typename DataType , template< typename > typename DTypeTraits>
void dump_reduce_json_results (const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
 
void dump_permute_json_results (const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
 
void dump_topk_softmax_json (const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
 
void dump_rmsnorm2d_fwd_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
 
void dump_add_rmsnorm2d_rdquant_fwd_json (const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
 
void dump_smoothquant_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
 
void dump_moe_sorting_json (const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
 
void dump_batched_transpose_json (const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
 
void dump_moe_smoothquant_json (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
 
void dump_fused_moe_json (const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
 
void dump_fmha_fwd_json_results (const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bais, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
 
void dump_fmha_bwd_json_results (const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
 

Macro Definition Documentation

◆ ADD_KEY_VALUE

#define ADD_KEY_VALUE (   key,
  value 
)    add_key_value_pair(writer, key, value);

◆ ADD_PERF_TO_JSON

#define ADD_PERF_TO_JSON (   _time,
  tflops,
  gbytes 
)    add_perf_to_json(writer, _time, tflops, gbytes);

◆ END_JSON_DUMP_FILE

#define END_JSON_DUMP_FILE ( )
Value:
writer.EndObject(); \
file << s.GetString(); \
file.close(); \
std::cout << "Results written to " << file_str << " successfully" << std::endl;

◆ START_JSON_DUMP_FILE

#define START_JSON_DUMP_FILE (   file_name)
Value:
std::string file_str(file_name); \
std::ofstream file(file_str); \
if(!file.is_open()) \
{ \
throw std::runtime_error("Could not open file: " + std::string(file_name)); \
} \
rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
writer.StartObject();
GenericStringBuffer< UTF8< char >, CrtAllocator > StringBuffer
Definition: fwd.h:59

Function Documentation

◆ add_key_value_pair()

template<typename T >
void add_key_value_pair ( rapidjson::Writer< rapidjson::StringBuffer > &  writer,
const char *  key,
value 
)

◆ dump_add_rmsnorm2d_rdquant_fwd_json()

void dump_add_rmsnorm2d_rdquant_fwd_json ( const std::string &  json_filename,
const std::string &  input_data_type,
const std::string &  quantized_data_type,
int  m,
int  n,
int  stride,
float  epsilon,
float  ave_time,
float  tflops,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "add_rmsnorm2d_rdquant_fwd" 
)

◆ dump_batched_gemm_json_results()

void dump_batched_gemm_json_results ( const std::string &  json_filename,
const std::string &  op_name,
int  M,
int  N,
int  K,
int  stride_A,
int  stride_B,
int  stride_C,
int  batch_stride_A,
int  batch_stride_B,
int  batch_stride_C,
int  batch_count,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "batched_gemm_basic" 
)

◆ dump_batched_transpose_json()

void dump_batched_transpose_json ( const std::string &  json_filename,
int  N,
int  C,
int  H,
int  W,
const std::string &  layout_in,
const std::string &  layout_out,
const std::string &  prec,
float  ave_time,
float  tflops,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "batched_transpose" 
)

◆ dump_elementwise_json_results()

void dump_elementwise_json_results ( const std::string &  json_filename,
const std::string &  prec,
int  grid_size,
int  block_size,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "elementwise" 
)

◆ dump_flatmm_json_results()

void dump_flatmm_json_results ( const std::string &  json_filename,
const std::string &  datatype,
int  M,
int  N,
int  K,
int  stride_A,
int  stride_B,
int  stride_C,
int  kbatch,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "flatmm_basic" 
)

◆ dump_fmha_bwd_json_results()

void dump_fmha_bwd_json_results ( const std::string &  json_filename,
const std::string &  data_type,
const std::string &  mode,
const std::string &  i_perm,
const std::string &  o_perm,
int  batch,
int  nhead,
int  nhead_k,
int  seqlen_q,
int  seqlen_k,
int  hdim_q,
int  hdim_v,
float  scale,
const std::string &  bias,
bool  use_dbias,
float  p_drop,
bool  s_randval,
bool  deterministic,
const std::string &  mask,
int  mask_left,
int  mask_right,
int  workspace_size,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "fmha_bwd" 
)

◆ dump_fmha_fwd_json_results()

void dump_fmha_fwd_json_results ( const std::string &  json_filename,
const std::string &  prec,
const std::string &  mode,
const std::string &  io_layout,
int  batch,
int  nhead,
int  nhead_k,
int  seqlen_qs,
int  seqlen_ks,
int  seqlen_kpads,
int  hdim_q,
int  hdim_v,
float  scale_s,
float  p_drop,
bool  lse,
bool  squant,
const std::string &  bais,
const std::string &  vlayout,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "fmha_fwd" 
)

◆ dump_fused_moe_json()

void dump_fused_moe_json ( const std::string &  json_filename,
const std::string &  api_str,
const std::string &  prec_str,
int  tokens,
bool  is_local_token,
int  local_tokens,
int  experts,
int  topk,
int  hidden_size,
int  intermediate_size,
int  stride,
int  block_m,
int  activation,
bool  gate_only,
bool  fused_quant,
bool  pass,
float  ave_time,
float  tflops,
float  tb_per_sec,
const std::string &  kernel_name = "fused_moe" 
)

◆ dump_gemm_json_results()

template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits>
void dump_gemm_json_results ( const std::string &  json_filename,
int  M,
int  N,
int  K,
int  stride_A,
int  stride_B,
int  stride_C,
bool  persistent,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "gemm_basic" 
)

◆ dump_gemm_multi_d_fp16_json_results()

void dump_gemm_multi_d_fp16_json_results ( const std::string &  json_filename,
const std::string &  op_name,
int  M,
int  N,
int  K,
int  StrideA,
int  StrideB,
int  StrideD0,
int  StrideD1,
int  StrideE,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "gemm_multi_d_fp16" 
)

◆ dump_grouped_gemm_json_results()

template<typename ALayout , typename BLayout , typename CLayout >
void dump_grouped_gemm_json_results ( const std::string &  json_filename,
const std::string &  op_name,
int  group_count,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "grouped_gemm" 
)

◆ dump_layernorm2d_fwd_json_results()

void dump_layernorm2d_fwd_json_results ( const std::string &  json_filename,
const std::string &  prec_i,
const std::string &  prec_o,
const std::string &  prec_sm,
const std::string &  prec_sy,
int  m,
int  n,
int  x_stride,
int  xr_stride,
int  y_stride,
int  yr_stride,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "layernorm2d_fwd" 
)

◆ dump_moe_smoothquant_json()

void dump_moe_smoothquant_json ( const std::string &  json_filename,
const std::string &  prec_i,
const std::string &  prec_o,
int  tokens,
int  hidden_size,
int  stride,
int  experts,
int  topk,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "moe_smoothquant" 
)

◆ dump_moe_sorting_json()

void dump_moe_sorting_json ( const std::string &  json_filename,
const std::string &  index_prec,
const std::string &  weight_prec,
const std::string &  workspace_size,
int  dispatch_policy,
int  tokens,
int  num_experts,
int  topk,
float  ave_time,
float  tflops,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "moe_sorting" 
)

◆ dump_permute_json_results()

void dump_permute_json_results ( const std::string &  json_filename,
const std::string &  data_type,
bool  pass,
float  ave_time,
float  tflop,
float  gb_per_sec,
const std::string &  kernel_name = "permute" 
)

◆ dump_reduce_json_results()

template<typename DataType , template< typename > typename DTypeTraits>
void dump_reduce_json_results ( const std::string &  json_filename,
int  N,
int  C,
int  H,
int  W,
bool  pass,
float  ave_time,
float  tflops,
float  gb_per_sec,
const std::string &  kernel_name = "reduce" 
)

◆ dump_rmsnorm2d_fwd_json()

void dump_rmsnorm2d_fwd_json ( const std::string &  json_filename,
const std::string &  prec_str,
int  m,
int  n,
int  x_stride,
int  xr_stride,
int  y_stride,
int  yr_stride,
int  use_model_sensitive_rmsnorm,
float  ave_time,
float  tflops,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "rmsnorm2d_fwd" 
)

◆ dump_smoothquant_json()

void dump_smoothquant_json ( const std::string &  json_filename,
const std::string &  prec_str,
int  m,
int  n,
int  x_stride,
int  y_stride,
float  ave_time,
float  tflops,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "smoothquant" 
)

◆ dump_topk_softmax_json()

void dump_topk_softmax_json ( const std::string &  json_filename,
const std::string &  input_prec,
const std::string &  weight_prec,
int  tokens,
int  experts,
int  topk,
int  stride_input,
int  stride_output,
float  ave_time,
float  tflop,
float  gb_per_sec,
bool  pass,
const std::string &  kernel_name = "topk_softmax" 
)