From 6c5a3195db126cedf7c891d1af3cac8080f8b759 Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Wed, 19 Mar 2025 10:56:50 -0400 Subject: [PATCH] [Misc][Benchmark] Add support for different `tokenizer_mode` (#15040) Signed-off-by: Aaron Pham Signed-off-by: Russell Bryant Co-authored-by: Russell Bryant --- benchmarks/benchmark_serving_structured_output.py | 14 ++++++++++++-- benchmarks/run_structured_output_benchmark.sh | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index 3a6e962c..444bda2a 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -732,8 +732,11 @@ def main(args: argparse.Namespace): api_url = f"http://{args.host}:{args.port}{args.endpoint}" base_url = f"http://{args.host}:{args.port}" - tokenizer = get_tokenizer(tokenizer_id, - trust_remote_code=args.trust_remote_code) + tokenizer = get_tokenizer( + tokenizer_id, + trust_remote_code=args.trust_remote_code, + tokenizer_mode=args.tokenizer_mode, + ) if args.dataset == 'grammar': args.structure_type = 'guided_grammar' @@ -876,6 +879,13 @@ if __name__ == "__main__": help= "Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 ) + parser.add_argument( + "--tokenizer-mode", + type=str, + default="auto", + help= + "Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 + ) parser.add_argument( "--num-prompts", type=int, diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh index 8a777320..126dfbc2 100755 --- a/benchmarks/run_structured_output_benchmark.sh +++ b/benchmarks/run_structured_output_benchmark.sh @@ -54,6 +54,7 @@ for qps in "${QPS_VALUES[@]}"; do python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \ --request-rate $qps \ --result-filename "$FILENAME" \ + --tokenizer-mode ${TOKENIZER_MODE:-"auto"} \ --port ${PORT:-8000} echo "Completed benchmark with QPS: $qps"