/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference#
#include "rapidjson/writer.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/document.h"
#include "rapidjson/rapidjson.h"
Go to the source code of this file.
Classes | |
struct | has_warp_tile_members< T, typename > |
struct | has_warp_tile_members< T, std::void_t< decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)> > |
Macros | |
#define | START_JSON_DUMP_FILE(file_name) |
#define | END_JSON_DUMP_FILE() |
#define | ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value); |
#define | ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes); |
Functions | |
template<typename T > | |
void | add_key_value_pair (rapidjson::Writer< rapidjson::StringBuffer > &writer, const char *key, T value) |
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits> | |
void | dump_gemm_json_results (const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic") |
void | dump_batched_gemm_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic") |
template<typename ALayout , typename BLayout , typename CLayout > | |
void | dump_grouped_gemm_json_results (const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm") |
void | dump_flatmm_json_results (const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic") |
void | dump_gemm_multi_d_fp16_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16") |
void | dump_elementwise_json_results (const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise") |
void | dump_layernorm2d_fwd_json_results (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd") |
template<typename DataType , template< typename > typename DTypeTraits> | |
void | dump_reduce_json_results (const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce") |
void | dump_permute_json_results (const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute") |
void | dump_topk_softmax_json (const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax") |
void | dump_rmsnorm2d_fwd_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd") |
void | dump_add_rmsnorm2d_rdquant_fwd_json (const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd") |
void | dump_smoothquant_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant") |
void | dump_moe_sorting_json (const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting") |
void | dump_batched_transpose_json (const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose") |
void | dump_moe_smoothquant_json (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant") |
void | dump_fused_moe_json (const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe") |
void | dump_fmha_fwd_json_results (const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bais, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd") |
void | dump_fmha_bwd_json_results (const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd") |
Macro Definition Documentation
◆ ADD_KEY_VALUE
#define ADD_KEY_VALUE | ( | key, | |
value | |||
) | add_key_value_pair(writer, key, value); |
◆ ADD_PERF_TO_JSON
#define ADD_PERF_TO_JSON | ( | _time, | |
tflops, | |||
gbytes | |||
) | add_perf_to_json(writer, _time, tflops, gbytes); |
◆ END_JSON_DUMP_FILE
#define END_JSON_DUMP_FILE | ( | ) |
Value:
writer.EndObject(); \
file << s.GetString(); \
file.close(); \
std::cout << "Results written to " << file_str << " successfully" << std::endl;
◆ START_JSON_DUMP_FILE
#define START_JSON_DUMP_FILE | ( | file_name | ) |
Value:
std::string file_str(file_name); \
std::ofstream file(file_str); \
if(!file.is_open()) \
{ \
throw std::runtime_error("Could not open file: " + std::string(file_name)); \
} \
rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
writer.StartObject();
Function Documentation
◆ add_key_value_pair()
template<typename T >
void add_key_value_pair | ( | rapidjson::Writer< rapidjson::StringBuffer > & | writer, |
const char * | key, | ||
T | value | ||
) |
◆ dump_add_rmsnorm2d_rdquant_fwd_json()
void dump_add_rmsnorm2d_rdquant_fwd_json | ( | const std::string & | json_filename, |
const std::string & | input_data_type, | ||
const std::string & | quantized_data_type, | ||
int | m, | ||
int | n, | ||
int | stride, | ||
float | epsilon, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "add_rmsnorm2d_rdquant_fwd" |
||
) |
◆ dump_batched_gemm_json_results()
void dump_batched_gemm_json_results | ( | const std::string & | json_filename, |
const std::string & | op_name, | ||
int | M, | ||
int | N, | ||
int | K, | ||
int | stride_A, | ||
int | stride_B, | ||
int | stride_C, | ||
int | batch_stride_A, | ||
int | batch_stride_B, | ||
int | batch_stride_C, | ||
int | batch_count, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "batched_gemm_basic" |
||
) |
◆ dump_batched_transpose_json()
void dump_batched_transpose_json | ( | const std::string & | json_filename, |
int | N, | ||
int | C, | ||
int | H, | ||
int | W, | ||
const std::string & | layout_in, | ||
const std::string & | layout_out, | ||
const std::string & | prec, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "batched_transpose" |
||
) |
◆ dump_elementwise_json_results()
void dump_elementwise_json_results | ( | const std::string & | json_filename, |
const std::string & | prec, | ||
int | grid_size, | ||
int | block_size, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "elementwise" |
||
) |
◆ dump_flatmm_json_results()
void dump_flatmm_json_results | ( | const std::string & | json_filename, |
const std::string & | datatype, | ||
int | M, | ||
int | N, | ||
int | K, | ||
int | stride_A, | ||
int | stride_B, | ||
int | stride_C, | ||
int | kbatch, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "flatmm_basic" |
||
) |
◆ dump_fmha_bwd_json_results()
void dump_fmha_bwd_json_results | ( | const std::string & | json_filename, |
const std::string & | data_type, | ||
const std::string & | mode, | ||
const std::string & | i_perm, | ||
const std::string & | o_perm, | ||
int | batch, | ||
int | nhead, | ||
int | nhead_k, | ||
int | seqlen_q, | ||
int | seqlen_k, | ||
int | hdim_q, | ||
int | hdim_v, | ||
float | scale, | ||
const std::string & | bias, | ||
bool | use_dbias, | ||
float | p_drop, | ||
bool | s_randval, | ||
bool | deterministic, | ||
const std::string & | mask, | ||
int | mask_left, | ||
int | mask_right, | ||
int | workspace_size, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "fmha_bwd" |
||
) |
◆ dump_fmha_fwd_json_results()
void dump_fmha_fwd_json_results | ( | const std::string & | json_filename, |
const std::string & | prec, | ||
const std::string & | mode, | ||
const std::string & | io_layout, | ||
int | batch, | ||
int | nhead, | ||
int | nhead_k, | ||
int | seqlen_qs, | ||
int | seqlen_ks, | ||
int | seqlen_kpads, | ||
int | hdim_q, | ||
int | hdim_v, | ||
float | scale_s, | ||
float | p_drop, | ||
bool | lse, | ||
bool | squant, | ||
const std::string & | bais, | ||
const std::string & | vlayout, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "fmha_fwd" |
||
) |
◆ dump_fused_moe_json()
void dump_fused_moe_json | ( | const std::string & | json_filename, |
const std::string & | api_str, | ||
const std::string & | prec_str, | ||
int | tokens, | ||
bool | is_local_token, | ||
int | local_tokens, | ||
int | experts, | ||
int | topk, | ||
int | hidden_size, | ||
int | intermediate_size, | ||
int | stride, | ||
int | block_m, | ||
int | activation, | ||
bool | gate_only, | ||
bool | fused_quant, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | tb_per_sec, | ||
const std::string & | kernel_name = "fused_moe" |
||
) |
◆ dump_gemm_json_results()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits>
void dump_gemm_json_results | ( | const std::string & | json_filename, |
int | M, | ||
int | N, | ||
int | K, | ||
int | stride_A, | ||
int | stride_B, | ||
int | stride_C, | ||
bool | persistent, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "gemm_basic" |
||
) |
◆ dump_gemm_multi_d_fp16_json_results()
void dump_gemm_multi_d_fp16_json_results | ( | const std::string & | json_filename, |
const std::string & | op_name, | ||
int | M, | ||
int | N, | ||
int | K, | ||
int | StrideA, | ||
int | StrideB, | ||
int | StrideD0, | ||
int | StrideD1, | ||
int | StrideE, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "gemm_multi_d_fp16" |
||
) |
◆ dump_grouped_gemm_json_results()
template<typename ALayout , typename BLayout , typename CLayout >
void dump_grouped_gemm_json_results | ( | const std::string & | json_filename, |
const std::string & | op_name, | ||
int | group_count, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "grouped_gemm" |
||
) |
◆ dump_layernorm2d_fwd_json_results()
void dump_layernorm2d_fwd_json_results | ( | const std::string & | json_filename, |
const std::string & | prec_i, | ||
const std::string & | prec_o, | ||
const std::string & | prec_sm, | ||
const std::string & | prec_sy, | ||
int | m, | ||
int | n, | ||
int | x_stride, | ||
int | xr_stride, | ||
int | y_stride, | ||
int | yr_stride, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "layernorm2d_fwd" |
||
) |
◆ dump_moe_smoothquant_json()
void dump_moe_smoothquant_json | ( | const std::string & | json_filename, |
const std::string & | prec_i, | ||
const std::string & | prec_o, | ||
int | tokens, | ||
int | hidden_size, | ||
int | stride, | ||
int | experts, | ||
int | topk, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "moe_smoothquant" |
||
) |
◆ dump_moe_sorting_json()
void dump_moe_sorting_json | ( | const std::string & | json_filename, |
const std::string & | index_prec, | ||
const std::string & | weight_prec, | ||
const std::string & | workspace_size, | ||
int | dispatch_policy, | ||
int | tokens, | ||
int | num_experts, | ||
int | topk, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "moe_sorting" |
||
) |
◆ dump_permute_json_results()
void dump_permute_json_results | ( | const std::string & | json_filename, |
const std::string & | data_type, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflop, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "permute" |
||
) |
◆ dump_reduce_json_results()
template<typename DataType , template< typename > typename DTypeTraits>
void dump_reduce_json_results | ( | const std::string & | json_filename, |
int | N, | ||
int | C, | ||
int | H, | ||
int | W, | ||
bool | pass, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
const std::string & | kernel_name = "reduce" |
||
) |
◆ dump_rmsnorm2d_fwd_json()
void dump_rmsnorm2d_fwd_json | ( | const std::string & | json_filename, |
const std::string & | prec_str, | ||
int | m, | ||
int | n, | ||
int | x_stride, | ||
int | xr_stride, | ||
int | y_stride, | ||
int | yr_stride, | ||
int | use_model_sensitive_rmsnorm, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "rmsnorm2d_fwd" |
||
) |
◆ dump_smoothquant_json()
void dump_smoothquant_json | ( | const std::string & | json_filename, |
const std::string & | prec_str, | ||
int | m, | ||
int | n, | ||
int | x_stride, | ||
int | y_stride, | ||
float | ave_time, | ||
float | tflops, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "smoothquant" |
||
) |
◆ dump_topk_softmax_json()
void dump_topk_softmax_json | ( | const std::string & | json_filename, |
const std::string & | input_prec, | ||
const std::string & | weight_prec, | ||
int | tokens, | ||
int | experts, | ||
int | topk, | ||
int | stride_input, | ||
int | stride_output, | ||
float | ave_time, | ||
float | tflop, | ||
float | gb_per_sec, | ||
bool | pass, | ||
const std::string & | kernel_name = "topk_softmax" |
||
) |