diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index 3a6e962c..444bda2a 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -732,8 +732,11 @@ def main(args: argparse.Namespace): api_url = f"http://{args.host}:{args.port}{args.endpoint}" base_url = f"http://{args.host}:{args.port}" - tokenizer = get_tokenizer(tokenizer_id, - trust_remote_code=args.trust_remote_code) + tokenizer = get_tokenizer( + tokenizer_id, + trust_remote_code=args.trust_remote_code, + tokenizer_mode=args.tokenizer_mode, + ) if args.dataset == 'grammar': args.structure_type = 'guided_grammar' @@ -876,6 +879,13 @@ if __name__ == "__main__": help= "Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 ) + parser.add_argument( + "--tokenizer-mode", + type=str, + default="auto", + help= + "Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 + ) parser.add_argument( "--num-prompts", type=int, diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh index 8a777320..126dfbc2 100755 --- a/benchmarks/run_structured_output_benchmark.sh +++ b/benchmarks/run_structured_output_benchmark.sh @@ -54,6 +54,7 @@ for qps in "${QPS_VALUES[@]}"; do python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \ --request-rate $qps \ --result-filename "$FILENAME" \ + --tokenizer-mode ${TOKENIZER_MODE:-"auto"} \ --port ${PORT:-8000} echo "Completed benchmark with QPS: $qps"