# SPDX-License-Identifier: Apache-2.0 import subprocess import pytest from ..utils import RemoteOpenAIServer MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" @pytest.fixture(scope="module") def server(): args = [ "--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy" ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.mark.benchmark def test_bench_serve(server): command = [ "vllm", "bench", "serve", "--model", MODEL_NAME, "--host", server.host, "--port", str(server.port), "--random-input-len", "32", "--random-output-len", "4", "--num-prompts", "5", ] result = subprocess.run(command, capture_output=True, text=True) print(result.stdout) print(result.stderr) assert result.returncode == 0, f"Benchmark failed: {result.stderr}"