vllm/tests/benchmarks/test_throughput_cli.py

# SPDX-License-Identifier: Apache-2.0
import subprocess

import pytest

MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"


@pytest.mark.benchmark
def test_bench_throughput():
    command = [
        "vllm", "bench", "throughput", "--model", MODEL_NAME, "--input-len",
        "32", "--output-len", "1", "--enforce-eager", "--load-format", "dummy"
    ]
    result = subprocess.run(command, capture_output=True, text=True)
    print(result.stdout)
    print(result.stderr)

    assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`# SPDX-License-Identifier: Apache-2.0`
			`import subprocess`

			`import pytest`

			`MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"`


			`@pytest.mark.benchmark`
			`def test_bench_throughput():`
			`command = [`
			`"vllm", "bench", "throughput", "--model", MODEL_NAME, "--input-len",`
			`"32", "--output-len", "1", "--enforce-eager", "--load-format", "dummy"`
			`]`
			`result = subprocess.run(command, capture_output=True, text=True)`
			`print(result.stdout)`
			`print(result.stderr)`

			`assert result.returncode == 0, f"Benchmark failed: {result.stderr}"`