From 05ff90b692a6cdac4d8c06e7a4a4606d1b8fe1d6 Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Tue, 5 Dec 2023 20:55:55 -0800 Subject: [PATCH] Save pytorch profiler output for latency benchmark (#1871) * Save profiler output * Apply feedback from code review --- benchmarks/benchmark_latency.py | 34 ++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index 8d9e177d..ae5583ff 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -1,6 +1,8 @@ """Benchmark the latency of processing a single batch of requests.""" import argparse import time +from pathlib import Path +from typing import Optional import numpy as np import torch @@ -34,12 +36,15 @@ def main(args: argparse.Namespace): print(sampling_params) dummy_prompt_token_ids = [[0] * args.input_len] * args.batch_size - def run_to_completion(profile: bool = False): - if profile: - with torch.profiler.profile(activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA, - ]) as p: + def run_to_completion(profile_dir: Optional[str] = None): + if profile_dir: + with torch.profiler.profile( + activities=[ + torch.profiler.ProfilerActivity.CPU, + torch.profiler.ProfilerActivity.CUDA, + ], + on_trace_ready=torch.profiler.tensorboard_trace_handler( + str(profile_dir))) as p: llm.generate(prompt_token_ids=dummy_prompt_token_ids, sampling_params=sampling_params, use_tqdm=False) @@ -54,11 +59,14 @@ def main(args: argparse.Namespace): return latency print("Warming up...") - run_to_completion(profile=False) + run_to_completion(profile_dir=None) if args.profile: - print("Profiling...") - run_to_completion(profile=True) + profile_dir = args.profile_result_dir + if not profile_dir: + profile_dir = Path(".") / "vllm_benchmark_result" / f"latency_result_{time.time()}" + print(f"Profiling (results will be saved to '{profile_dir}')...") + run_to_completion(profile_dir=args.profile_result_dir) return # Benchmark. @@ -107,5 +115,13 @@ if __name__ == '__main__': '--profile', action='store_true', help='profile the generation process of a single batch') + parser.add_argument( + '--profile-result-dir', + type=str, + default=None, + help=( + 'path to save the pytorch profiler output. Can be visualized ' + 'with ui.perfetto.dev or Tensorboard.' + )) args = parser.parse_args() main(args)