diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py index e031686c..1030ec24 100644 --- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py +++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py @@ -84,8 +84,13 @@ if __name__ == "__main__": # this result is generated via `benchmark_serving.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result @@ -99,8 +104,13 @@ if __name__ == "__main__": # this result is generated via `benchmark_latency.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result @@ -121,8 +131,13 @@ if __name__ == "__main__": # this result is generated via `benchmark_throughput.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh index 9425cb07..a3555f72 100644 --- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh @@ -309,11 +309,14 @@ run_serving_tests() { new_test_name=$test_name"_qps_"$qps + # pass the tensor parallel size to the client so that it can be displayed + # on the benchmark dashboard client_command="python3 benchmark_serving.py \ --save-result \ --result-dir $RESULTS_FOLDER \ --result-filename ${new_test_name}.json \ --request-rate $qps \ + --metadata "tensor_parallel_size=$tp" \ $client_args" echo "Running test case $test_name with qps $qps" diff --git a/.buildkite/nightly-benchmarks/tests/throughput-tests.json b/.buildkite/nightly-benchmarks/tests/throughput-tests.json index 91ef6d16..9bc87cbc 100644 --- a/.buildkite/nightly-benchmarks/tests/throughput-tests.json +++ b/.buildkite/nightly-benchmarks/tests/throughput-tests.json @@ -32,4 +32,4 @@ "backend": "vllm" } } -] \ No newline at end of file +] diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index 71ec909c..c82358d1 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional import numpy as np import torch -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from tqdm import tqdm from vllm import LLM, SamplingParams @@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, for k in ["avg_latency", "percentiles"]}) if pt_records: pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 9416a22b..1bb83b08 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -56,7 +56,7 @@ try: except ImportError: from argparse import ArgumentParser as FlexibleArgumentParser -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json MILLISECONDS_TO_SECONDS_CONVERSION = 1000 @@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, if pt_records: # Don't use json suffix here as we don't want CI to pick it up pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index ca54213c..04de08fa 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple import torch import uvloop -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from PIL import Image from tqdm import tqdm from transformers import (AutoModelForCausalLM, AutoTokenizer, @@ -366,8 +366,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, if pt_records: # Don't use json suffix here as we don't want CI to pick it up pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index 6f01cf20..ac0688ca 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 import argparse +import json +import math import os from typing import Any, Dict, List @@ -34,6 +36,34 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace, "extra_info": extra_info, }, } + + tp = record["benchmark"]["extra_info"]["args"].get( + "tensor_parallel_size") + # Save tensor_parallel_size parameter if it's part of the metadata + if not tp and "tensor_parallel_size" in extra_info: + record["benchmark"]["extra_info"]["args"][ + "tensor_parallel_size"] = extra_info["tensor_parallel_size"] + records.append(record) return records + + +class InfEncoder(json.JSONEncoder): + + def clear_inf(self, o: Any): + if isinstance(o, dict): + return {k: self.clear_inf(v) for k, v in o.items()} + elif isinstance(o, list): + return [self.clear_inf(v) for v in o] + elif isinstance(o, float) and math.isinf(o): + return "inf" + return o + + def iterencode(self, o: Any, *args, **kwargs) -> Any: + return super().iterencode(self.clear_inf(o), *args, **kwargs) + + +def write_to_json(filename: str, records: List) -> None: + with open(filename, "w") as f: + json.dump(records, f, cls=InfEncoder)