[CI] Avoid naming different metrics with the same name in performance benchmark (#5615)

This commit is contained in:
Kuntai Du 2024-06-17 21:37:18 -07:00 committed by GitHub
parent 32c86e494a
commit 114d7270ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,10 +15,10 @@ latency_column_mapping = {
"avg_latency": "Mean latency (ms)", "avg_latency": "Mean latency (ms)",
# "P10": "P10 (s)", # "P10": "P10 (s)",
# "P25": "P25 (s)", # "P25": "P25 (s)",
"P50": "Median", "P50": "Median latency (ms)",
# "P75": "P75 (s)", # "P75": "P75 (s)",
# "P90": "P90 (s)", # "P90": "P90 (s)",
"P99": "P99", "P99": "P99 latency (ms)",
} }
# throughput tests and the keys that will be printed into markdown # throughput tests and the keys that will be printed into markdown
@ -43,15 +43,14 @@ serving_column_mapping = {
# "input_throughput": "Input Tput (tok/s)", # "input_throughput": "Input Tput (tok/s)",
# "output_throughput": "Output Tput (tok/s)", # "output_throughput": "Output Tput (tok/s)",
"mean_ttft_ms": "Mean TTFT (ms)", "mean_ttft_ms": "Mean TTFT (ms)",
# do not say TTFT again to avoid the table getting too wide "median_ttft_ms": "Median TTFT (ms)",
"median_ttft_ms": "Median", "p99_ttft_ms": "P99 TTFT (ms)",
"p99_ttft_ms": "P99",
# "mean_tpot_ms": "Mean TPOT (ms)", # "mean_tpot_ms": "Mean TPOT (ms)",
# "median_tpot_ms": "Median", # "median_tpot_ms": "Median",
# "p99_tpot_ms": "P99", # "p99_tpot_ms": "P99",
"mean_itl_ms": "Mean ITL (ms)", "mean_itl_ms": "Mean ITL (ms)",
"median_itl_ms": "Median", "median_itl_ms": "Median ITL (ms)",
"p99_itl_ms": "P99", "p99_itl_ms": "P99 ITL (ms)",
} }
@ -183,3 +182,11 @@ if __name__ == "__main__":
serving_tests_markdown_table=serving_md_table, serving_tests_markdown_table=serving_md_table,
benchmarking_results_in_json_string=processed_results_json) benchmarking_results_in_json_string=processed_results_json)
f.write(results) f.write(results)
# document benchmarking results in json
with open(results_folder / "benchmark_results.json", "w") as f:
results = latency_results.to_dict(
orient='records') + throughput_results.to_dict(
orient='records') + serving_results.to_dict(orient='records')
f.write(json.dumps(results))