/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File
json_dump.hpp
Go to the documentation of this file.
1 #pragma GCC diagnostic push
2 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
3 #include "rapidjson/writer.h"
5 #include "rapidjson/document.h"
6 #include "rapidjson/rapidjson.h"
7 // #include <fstream>
8 #pragma GCC diagnostic pop
9 
10 #define START_JSON_DUMP_FILE(file_name) \
11  std::string file_str(file_name); \
12  std::ofstream file(file_str); \
13  if(!file.is_open()) \
14  { \
15  throw std::runtime_error("Could not open file: " + std::string(file_name)); \
16  } \
17  rapidjson::StringBuffer s; \
18  rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
19  writer.StartObject();
20 
21 #define END_JSON_DUMP_FILE() \
22  writer.EndObject(); \
23  file << s.GetString(); \
24  file.close(); \
25  std::cout << "Results written to " << file_str << " successfully" << std::endl;
26 
27 #define ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value);
28 #define ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes);
29 
30 template <typename T>
31 void add_key_value_pair(rapidjson::Writer<rapidjson::StringBuffer>& writer,
32  const char* key,
33  T value)
34 {
35  writer.Key(key);
37  {
38  writer.String(value, static_cast<rapidjson::SizeType>(std::strlen(value)));
39  }
40  else if constexpr(std::is_same<T, std::string>::value)
41  {
42  writer.String(value.c_str(), static_cast<rapidjson::SizeType>(value.length()));
43  }
44  else if constexpr(std::is_floating_point<T>::value)
45  {
46  writer.Double(static_cast<double>(value));
47  }
48  else if constexpr(std::is_integral<T>::value)
49  {
50  writer.Int64(static_cast<int64_t>(value));
51  }
52  else
53  {
56  "Unsupported type for JSON serialization");
57  }
58 }
59 
60 static void add_perf_to_json(rapidjson::Writer<rapidjson::StringBuffer>& writer,
61  float time,
62  float tflops,
63  float gbytes)
64 {
65  std::string roster("perf");
66  writer.String(roster.c_str(), static_cast<rapidjson::SizeType>(roster.length()));
67 
68  writer.StartArray();
69  writer.StartObject();
70 
71  add_key_value_pair(writer, "time", time);
72  add_key_value_pair(writer, "tflops", tflops);
73  add_key_value_pair(writer, "gbytes", gbytes);
74 
75  writer.EndObject();
76  writer.EndArray();
77 }
78 
79 // Helper traits to check for static member existence
80 template <typename T, typename = void>
82 {
83 };
84 
85 template <typename T>
87  T,
88  std::void_t<decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)>>
90 {
91 };
92 
93 template <typename ALayout,
94  typename BLayout,
95  typename CLayout,
96  typename ADataType,
97  typename BDataType,
98  typename CDataType,
99  typename GemmConfig,
100  template <typename>
101  typename DTypeTraits>
102 void dump_gemm_json_results(const std::string& json_filename,
103  int M,
104  int N,
105  int K,
106  int stride_A,
107  int stride_B,
108  int stride_C,
109  bool persistent,
110  bool pass,
111  float ave_time,
112  float tflops,
113  float gb_per_sec,
114  const std::string& kernel_name = "gemm_basic")
115 {
116  START_JSON_DUMP_FILE(json_filename);
117  ADD_KEY_VALUE("name", kernel_name);
118  ADD_KEY_VALUE("M", M);
119  ADD_KEY_VALUE("N", N);
120  ADD_KEY_VALUE("K", K);
121  ADD_KEY_VALUE("stride_A", stride_A);
122  ADD_KEY_VALUE("stride_B", stride_B);
123  ADD_KEY_VALUE("stride_C", stride_C);
124  ADD_KEY_VALUE("A_layout", ALayout::name);
125  ADD_KEY_VALUE("B_layout", BLayout::name);
126  ADD_KEY_VALUE("C_layout", CLayout::name);
127  using TraitsADataType = DTypeTraits<ADataType>;
128  using TraitsBDataType = DTypeTraits<BDataType>;
129  using TraitsCDataType = DTypeTraits<CDataType>;
130  ADD_KEY_VALUE("A_type", TraitsADataType::name);
131  ADD_KEY_VALUE("B_type", TraitsBDataType::name);
132  ADD_KEY_VALUE("C_type", TraitsCDataType::name);
133  ADD_KEY_VALUE("structured_sparsity", GemmConfig::UseStructuredSparsity ? "on" : "off");
134 
136  {
137  ADD_KEY_VALUE("warp_tile",
138  std::to_string(GemmConfig::M_Warp_Tile) + "x" +
139  std::to_string(GemmConfig::N_Warp_Tile) + "x" +
140  std::to_string(GemmConfig::K_Warp_Tile));
141  }
142  ADD_KEY_VALUE("persistent", persistent ? "on" : "off");
143  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
144  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
146 }
147 
148 void dump_batched_gemm_json_results(const std::string& json_filename,
149  const std::string& op_name,
150  int M,
151  int N,
152  int K,
153  int stride_A,
154  int stride_B,
155  int stride_C,
156  int batch_stride_A,
157  int batch_stride_B,
158  int batch_stride_C,
159  int batch_count,
160  bool pass,
161  float ave_time,
162  float tflops,
163  float gb_per_sec,
164  const std::string& kernel_name = "batched_gemm_basic")
165 {
166  START_JSON_DUMP_FILE(json_filename);
167  ADD_KEY_VALUE("name", kernel_name);
168  ADD_KEY_VALUE("op_name", op_name);
169  ADD_KEY_VALUE("M", M);
170  ADD_KEY_VALUE("N", N);
171  ADD_KEY_VALUE("K", K);
172  ADD_KEY_VALUE("stride_A", stride_A);
173  ADD_KEY_VALUE("stride_B", stride_B);
174  ADD_KEY_VALUE("stride_C", stride_C);
175  ADD_KEY_VALUE("batch_stride_A", batch_stride_A);
176  ADD_KEY_VALUE("batch_stride_B", batch_stride_B);
177  ADD_KEY_VALUE("batch_stride_C", batch_stride_C);
178  ADD_KEY_VALUE("batch_count", batch_count);
179  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
180  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
182 }
183 
184 template <typename ALayout, typename BLayout, typename CLayout>
185 void dump_grouped_gemm_json_results(const std::string& json_filename,
186  const std::string& op_name,
187  int group_count,
188  bool pass,
189  float ave_time,
190  float tflops,
191  float gb_per_sec,
192  const std::string& kernel_name = "grouped_gemm")
193 {
194  START_JSON_DUMP_FILE(json_filename);
195  ADD_KEY_VALUE("name", kernel_name);
196  ADD_KEY_VALUE("op_name", op_name);
197  ADD_KEY_VALUE("group_count", group_count);
198  ADD_KEY_VALUE("A_layout", ALayout::name);
199  ADD_KEY_VALUE("B_layout", BLayout::name);
200  ADD_KEY_VALUE("C_layout", CLayout::name);
201  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
202  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
204 }
205 
206 void dump_flatmm_json_results(const std::string& json_filename,
207  const std::string& datatype,
208  int M,
209  int N,
210  int K,
211  int stride_A,
212  int stride_B,
213  int stride_C,
214  int kbatch,
215  bool pass,
216  float ave_time,
217  float tflops,
218  float gb_per_sec,
219  const std::string& kernel_name = "flatmm_basic")
220 {
221  START_JSON_DUMP_FILE(json_filename);
222  ADD_KEY_VALUE("name", kernel_name);
223  ADD_KEY_VALUE("DataType", datatype);
224  ADD_KEY_VALUE("M", M);
225  ADD_KEY_VALUE("N", N);
226  ADD_KEY_VALUE("K", K);
227  ADD_KEY_VALUE("StrideA", stride_A);
228  ADD_KEY_VALUE("StrideB", stride_B);
229  ADD_KEY_VALUE("StrideC", stride_C);
230  ADD_KEY_VALUE("kbatch", kbatch);
231  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
232  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
234 }
235 
236 void dump_gemm_multi_d_fp16_json_results(const std::string& json_filename,
237  const std::string& op_name,
238  int M,
239  int N,
240  int K,
241  int StrideA,
242  int StrideB,
243  int StrideD0,
244  int StrideD1,
245  int StrideE,
246  bool pass,
247  float ave_time,
248  float tflops,
249  float gb_per_sec,
250  const std::string& kernel_name = "gemm_multi_d_fp16")
251 {
252  START_JSON_DUMP_FILE(json_filename);
253  ADD_KEY_VALUE("name", kernel_name);
254  ADD_KEY_VALUE("op_name", op_name);
255  ADD_KEY_VALUE("M", M);
256  ADD_KEY_VALUE("N", N);
257  ADD_KEY_VALUE("K", K);
258  ADD_KEY_VALUE("StrideA", StrideA);
259  ADD_KEY_VALUE("StrideB", StrideB);
260  ADD_KEY_VALUE("StrideD0", StrideD0);
261  ADD_KEY_VALUE("StrideD1", StrideD1);
262  ADD_KEY_VALUE("StrideE", StrideE);
263  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
264  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
266 }
267 
268 void dump_elementwise_json_results(const std::string& json_filename,
269  const std::string& prec,
270  int grid_size,
271  int block_size,
272  float ave_time,
273  float tflops,
274  float gb_per_sec,
275  const std::string& kernel_name = "elementwise")
276 {
277  START_JSON_DUMP_FILE(json_filename);
278  ADD_KEY_VALUE("name", kernel_name);
279  ADD_KEY_VALUE("prec", prec);
280  ADD_KEY_VALUE("grid_size", grid_size);
281  ADD_KEY_VALUE("block_size", block_size);
282  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
284 }
285 
286 void dump_layernorm2d_fwd_json_results(const std::string& json_filename,
287  const std::string& prec_i,
288  const std::string& prec_o,
289  const std::string& prec_sm,
290  const std::string& prec_sy,
291  int m,
292  int n,
293  int x_stride,
294  int xr_stride,
295  int y_stride,
296  int yr_stride,
297  bool pass,
298  float ave_time,
299  float tflops,
300  float gb_per_sec,
301  const std::string& kernel_name = "layernorm2d_fwd")
302 {
303  START_JSON_DUMP_FILE(json_filename);
304  ADD_KEY_VALUE("name", kernel_name);
305  ADD_KEY_VALUE("prec_i", prec_i);
306  ADD_KEY_VALUE("prec_o", prec_o);
307  ADD_KEY_VALUE("prec_sm", prec_sm);
308  ADD_KEY_VALUE("prec_sy", prec_sy);
309  ADD_KEY_VALUE("m", m);
310  ADD_KEY_VALUE("n", n);
311  ADD_KEY_VALUE("x_stride", x_stride);
312  ADD_KEY_VALUE("xr_stride", xr_stride);
313  ADD_KEY_VALUE("y_stride", y_stride);
314  ADD_KEY_VALUE("yr_stride", yr_stride);
315  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
316  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
318 }
319 
320 template <typename DataType, template <typename> typename DTypeTraits>
321 void dump_reduce_json_results(const std::string& json_filename,
322  int N,
323  int C,
324  int H,
325  int W,
326  bool pass,
327  float ave_time,
328  float tflops,
329  float gb_per_sec,
330  const std::string& kernel_name = "reduce")
331 {
332  START_JSON_DUMP_FILE(json_filename);
333  ADD_KEY_VALUE("name", kernel_name);
334  using Traits = DTypeTraits<DataType>;
335  ADD_KEY_VALUE("data_type", Traits::name);
336  ADD_KEY_VALUE("N", N);
337  ADD_KEY_VALUE("C", C);
338  ADD_KEY_VALUE("H", H);
339  ADD_KEY_VALUE("W", W);
340  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
341  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
343 }
344 
345 void dump_permute_json_results(const std::string& json_filename,
346  const std::string& data_type,
347  bool pass,
348  float ave_time,
349  float tflop,
350  float gb_per_sec,
351  const std::string& kernel_name = "permute")
352 {
353  START_JSON_DUMP_FILE(json_filename);
354  ADD_KEY_VALUE("name", kernel_name);
355  ADD_KEY_VALUE("data_type", data_type);
356  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
357  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec)
359 }
360 
361 void dump_topk_softmax_json(const std::string& json_filename,
362  const std::string& input_prec,
363  const std::string& weight_prec,
364  int tokens,
365  int experts,
366  int topk,
367  int stride_input,
368  int stride_output,
369  float ave_time,
370  float tflop,
371  float gb_per_sec,
372  bool pass,
373  const std::string& kernel_name = "topk_softmax")
374 {
375  START_JSON_DUMP_FILE(json_filename);
376  ADD_KEY_VALUE("name", kernel_name);
377  ADD_KEY_VALUE("input_prec", input_prec);
378  ADD_KEY_VALUE("weight_prec", weight_prec);
379  ADD_KEY_VALUE("tokens", tokens);
380  ADD_KEY_VALUE("experts", experts);
381  ADD_KEY_VALUE("topk", topk);
382  ADD_KEY_VALUE("stride_input", stride_input);
383  ADD_KEY_VALUE("stride_output", stride_output);
384  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
385  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec);
387 }
388 
389 void dump_rmsnorm2d_fwd_json(const std::string& json_filename,
390  const std::string& prec_str,
391  int m,
392  int n,
393  int x_stride,
394  int xr_stride,
395  int y_stride,
396  int yr_stride,
397  int use_model_sensitive_rmsnorm,
398  float ave_time,
399  float tflops,
400  float gb_per_sec,
401  bool pass,
402  const std::string& kernel_name = "rmsnorm2d_fwd")
403 {
404  START_JSON_DUMP_FILE(json_filename);
405  ADD_KEY_VALUE("name", kernel_name);
406  ADD_KEY_VALUE("prec", prec_str);
407  ADD_KEY_VALUE("m", m);
408  ADD_KEY_VALUE("n", n);
409  ADD_KEY_VALUE("x_stride", x_stride);
410  ADD_KEY_VALUE("xr_stride", xr_stride);
411  ADD_KEY_VALUE("y_stride", y_stride);
412  ADD_KEY_VALUE("yr_stride", yr_stride);
413  ADD_KEY_VALUE("use_model_sensitive_rmsnorm", use_model_sensitive_rmsnorm);
414  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
415  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
417 }
418 
420  const std::string& json_filename,
421  const std::string& input_data_type,
422  const std::string& quantized_data_type,
423  int m,
424  int n,
425  int stride,
426  float epsilon,
427  float ave_time,
428  float tflops,
429  float gb_per_sec,
430  bool pass,
431  const std::string& kernel_name = "add_rmsnorm2d_rdquant_fwd")
432 {
433  START_JSON_DUMP_FILE(json_filename);
434  ADD_KEY_VALUE("name", kernel_name);
435  ADD_KEY_VALUE("input_data_type", input_data_type);
436  ADD_KEY_VALUE("quantized_data_type", quantized_data_type);
437  ADD_KEY_VALUE("m", m);
438  ADD_KEY_VALUE("n", n);
439  ADD_KEY_VALUE("stride", stride);
440  ADD_KEY_VALUE("epsilon", epsilon);
441  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
442  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
444 }
445 
446 void dump_smoothquant_json(const std::string& json_filename,
447  const std::string& prec_str,
448  int m,
449  int n,
450  int x_stride,
451  int y_stride,
452  float ave_time,
453  float tflops,
454  float gb_per_sec,
455  bool pass,
456  const std::string& kernel_name = "smoothquant")
457 {
458  START_JSON_DUMP_FILE(json_filename);
459  ADD_KEY_VALUE("name", kernel_name);
460  ADD_KEY_VALUE("prec", prec_str);
461  ADD_KEY_VALUE("m", m);
462  ADD_KEY_VALUE("n", n);
463  ADD_KEY_VALUE("x_stride", x_stride);
464  ADD_KEY_VALUE("y_stride", y_stride);
465  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
466  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
468 }
469 
470 void dump_moe_sorting_json(const std::string& json_filename,
471  const std::string& index_prec,
472  const std::string& weight_prec,
473  const std::string& workspace_size,
474  int dispatch_policy,
475  int tokens,
476  int num_experts,
477  int topk,
478  float ave_time,
479  float tflops,
480  float gb_per_sec,
481  bool pass,
482  const std::string& kernel_name = "moe_sorting")
483 {
484  START_JSON_DUMP_FILE(json_filename);
485  ADD_KEY_VALUE("name", kernel_name);
486  ADD_KEY_VALUE("index_prec", index_prec);
487  ADD_KEY_VALUE("weight_prec", weight_prec);
488  ADD_KEY_VALUE("workspace_size", workspace_size);
489  ADD_KEY_VALUE("dispatch_policy", dispatch_policy);
490  ADD_KEY_VALUE("tokens", tokens);
491  ADD_KEY_VALUE("num_experts", num_experts);
492  ADD_KEY_VALUE("topk", topk);
493  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
494  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
496 }
497 
498 void dump_batched_transpose_json(const std::string& json_filename,
499  int N,
500  int C,
501  int H,
502  int W,
503  const std::string& layout_in,
504  const std::string& layout_out,
505  const std::string& prec,
506  float ave_time,
507  float tflops,
508  float gb_per_sec,
509  bool pass,
510  const std::string& kernel_name = "batched_transpose")
511 {
512  START_JSON_DUMP_FILE(json_filename);
513  ADD_KEY_VALUE("name", kernel_name);
514  ADD_KEY_VALUE("N", N);
515  ADD_KEY_VALUE("C", C);
516  ADD_KEY_VALUE("H", H);
517  ADD_KEY_VALUE("W", W);
518  ADD_KEY_VALUE("LayoutIn", layout_in);
519  ADD_KEY_VALUE("LayoutOut", layout_out);
520  ADD_KEY_VALUE("Precision", prec);
521  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
522  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
524 }
525 
526 void dump_moe_smoothquant_json(const std::string& json_filename,
527  const std::string& prec_i,
528  const std::string& prec_o,
529  int tokens,
530  int hidden_size,
531  int stride,
532  int experts,
533  int topk,
534  bool pass,
535  float ave_time,
536  float tflops,
537  float gb_per_sec,
538  const std::string& kernel_name = "moe_smoothquant")
539 {
540  START_JSON_DUMP_FILE(json_filename);
541  ADD_KEY_VALUE("name", kernel_name);
542  ADD_KEY_VALUE("prec_i", prec_i);
543  ADD_KEY_VALUE("prec_o", prec_o);
544  ADD_KEY_VALUE("tokens", tokens);
545  ADD_KEY_VALUE("hidden_size", hidden_size);
546  ADD_KEY_VALUE("stride", stride);
547  ADD_KEY_VALUE("experts", experts);
548  ADD_KEY_VALUE("topk", topk);
549  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
550  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
552 }
553 
554 void dump_fused_moe_json(const std::string& json_filename,
555  const std::string& api_str,
556  const std::string& prec_str,
557  int tokens,
558  bool is_local_token,
559  int local_tokens,
560  int experts,
561  int topk,
562  int hidden_size,
563  int intermediate_size,
564  int stride,
565  int block_m,
566  int activation,
567  bool gate_only,
568  bool fused_quant,
569  bool pass,
570  float ave_time,
571  float tflops,
572  float tb_per_sec,
573  const std::string& kernel_name = "fused_moe")
574 {
575  START_JSON_DUMP_FILE(json_filename);
576  ADD_KEY_VALUE("name", kernel_name);
577  ADD_KEY_VALUE("api", api_str);
578  ADD_KEY_VALUE("prec", prec_str);
579  ADD_KEY_VALUE("tokens", tokens);
580  if(is_local_token)
581  {
582  ADD_KEY_VALUE("local_tokens", local_tokens);
583  }
584  ADD_KEY_VALUE("experts", experts);
585  ADD_KEY_VALUE("topk", topk);
586  ADD_KEY_VALUE("hidden_size", hidden_size);
587  ADD_KEY_VALUE("intermediate_size", intermediate_size);
588  ADD_KEY_VALUE("stride", stride);
589  ADD_KEY_VALUE("block_m", block_m);
590  ADD_KEY_VALUE("activation", activation);
591  ADD_KEY_VALUE("gate_only", gate_only);
592  ADD_KEY_VALUE("fused_quant", fused_quant);
593  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
594  ADD_PERF_TO_JSON(ave_time, tflops, (tb_per_sec * 1024.0f))
596 }
597 
598 void dump_fmha_fwd_json_results(const std::string& json_filename,
599  const std::string& prec,
600  const std::string& mode,
601  const std::string& io_layout,
602  int batch,
603  int nhead,
604  int nhead_k,
605  int seqlen_qs,
606  int seqlen_ks,
607  int seqlen_kpads,
608  int hdim_q,
609  int hdim_v,
610  float scale_s,
611  float p_drop,
612  bool lse,
613  bool squant,
614  const std::string& bais,
615  const std::string& vlayout,
616  bool pass,
617  float ave_time,
618  float tflops,
619  float gb_per_sec,
620  const std::string& kernel_name = "fmha_fwd")
621 {
622  START_JSON_DUMP_FILE(json_filename);
623  ADD_KEY_VALUE("name", kernel_name);
624  ADD_KEY_VALUE("prec", prec);
625  ADD_KEY_VALUE("mode", mode);
626  ADD_KEY_VALUE("io_layout", io_layout);
627  ADD_KEY_VALUE("batch", batch);
628  ADD_KEY_VALUE("nhead", nhead);
629  ADD_KEY_VALUE("nhead_k", nhead_k);
630  ADD_KEY_VALUE("seqlen_q", seqlen_qs);
631  ADD_KEY_VALUE("seqlen_k", seqlen_ks);
632  ADD_KEY_VALUE("seqlen_kpads", seqlen_kpads);
633  ADD_KEY_VALUE("hdim_q", hdim_q);
634  ADD_KEY_VALUE("hdim_v", hdim_v);
635  ADD_KEY_VALUE("scale_s", scale_s);
636  ADD_KEY_VALUE("p_drop", p_drop);
637  ADD_KEY_VALUE("lse", lse);
638  ADD_KEY_VALUE("squant", squant);
639  ADD_KEY_VALUE("bias", bais);
640  ADD_KEY_VALUE("vlayout", vlayout);
641  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
642  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
644 }
645 
646 void dump_fmha_bwd_json_results(const std::string& json_filename,
647  const std::string& data_type,
648  const std::string& mode,
649  const std::string& i_perm,
650  const std::string& o_perm,
651  int batch,
652  int nhead,
653  int nhead_k,
654  int seqlen_q,
655  int seqlen_k,
656  int hdim_q,
657  int hdim_v,
658  float scale,
659  const std::string& bias,
660  bool use_dbias,
661  float p_drop,
662  bool s_randval,
663  bool deterministic,
664  const std::string& mask,
665  int mask_left,
666  int mask_right,
667  int workspace_size,
668  bool pass,
669  float ave_time,
670  float tflops,
671  float gb_per_sec,
672  const std::string& kernel_name = "fmha_bwd")
673 {
674  START_JSON_DUMP_FILE(json_filename);
675  ADD_KEY_VALUE("name", kernel_name);
676  ADD_KEY_VALUE("prec", data_type);
677  ADD_KEY_VALUE("mode", mode);
678  ADD_KEY_VALUE("i_perm", i_perm);
679  ADD_KEY_VALUE("o_perm", o_perm);
680  ADD_KEY_VALUE("batch", batch);
681  ADD_KEY_VALUE("nhead", nhead);
682  ADD_KEY_VALUE("nhead_k", nhead_k);
683  ADD_KEY_VALUE("seqlen_q", seqlen_q);
684  ADD_KEY_VALUE("seqlen_k", seqlen_k);
685  ADD_KEY_VALUE("hdim_q", hdim_q);
686  ADD_KEY_VALUE("hdim_v", hdim_v);
687  ADD_KEY_VALUE("scale", scale);
688  ADD_KEY_VALUE("bias", bias);
689  ADD_KEY_VALUE("use_dbias", use_dbias);
690  ADD_KEY_VALUE("p_drop", p_drop);
691  ADD_KEY_VALUE("s_randval", s_randval);
692  ADD_KEY_VALUE("deterministic", deterministic ? "true" : "false");
693  ADD_KEY_VALUE("mask", mask);
694  ADD_KEY_VALUE("mask_left", mask_left);
695  ADD_KEY_VALUE("mask_right", mask_right);
696  ADD_KEY_VALUE("workspace_size", workspace_size);
697  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
698  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
700 }
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
Definition: json_dump.hpp:28
void dump_gemm_json_results(const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
Definition: json_dump.hpp:102
#define END_JSON_DUMP_FILE()
Definition: json_dump.hpp:21
void dump_rmsnorm2d_fwd_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
Definition: json_dump.hpp:389
void dump_batched_gemm_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
Definition: json_dump.hpp:148
void dump_fmha_bwd_json_results(const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
Definition: json_dump.hpp:646
void dump_layernorm2d_fwd_json_results(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
Definition: json_dump.hpp:286
void dump_moe_smoothquant_json(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
Definition: json_dump.hpp:526
void dump_reduce_json_results(const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
Definition: json_dump.hpp:321
void dump_flatmm_json_results(const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
Definition: json_dump.hpp:206
void dump_batched_transpose_json(const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
Definition: json_dump.hpp:498
void add_key_value_pair(rapidjson::Writer< rapidjson::StringBuffer > &writer, const char *key, T value)
Definition: json_dump.hpp:31
#define START_JSON_DUMP_FILE(file_name)
Definition: json_dump.hpp:10
void dump_permute_json_results(const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
Definition: json_dump.hpp:345
void dump_gemm_multi_d_fp16_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
Definition: json_dump.hpp:236
void dump_moe_sorting_json(const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
Definition: json_dump.hpp:470
void dump_grouped_gemm_json_results(const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
Definition: json_dump.hpp:185
void dump_fused_moe_json(const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
Definition: json_dump.hpp:554
void dump_fmha_fwd_json_results(const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bais, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
Definition: json_dump.hpp:598
void dump_smoothquant_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
Definition: json_dump.hpp:446
#define ADD_KEY_VALUE(key, value)
Definition: json_dump.hpp:27
void dump_elementwise_json_results(const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
Definition: json_dump.hpp:268
void dump_topk_softmax_json(const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
Definition: json_dump.hpp:361
void dump_add_rmsnorm2d_rdquant_fwd_json(const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
Definition: json_dump.hpp:419
bool_constant< false > false_type
Definition: integral_constant.hpp:63
bool_constant< true > true_type
Definition: integral_constant.hpp:62
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
common definitions and configuration
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:415
signed __int64 int64_t
Definition: stdint.h:135
Definition: json_dump.hpp:82