vllm/tests/entrypoints/openai/test_shutdown.py
Wallas Henrique c0292211ce
[CI/Build] Replaced some models on tests for smaller ones (#9570)
Signed-off-by: Wallas Santos <wallashss@ibm.com>
2024-10-22 04:52:14 +00:00

49 lines
1.5 KiB
Python

import json
import os
import openai
import pytest
from ...utils import RemoteOpenAIServer
MODEL_NAME = "meta-llama/Llama-3.2-1B"
@pytest.mark.asyncio
async def test_shutdown_on_engine_failure(tmp_path):
# Use a bad adapter to crash the engine
# (This test will fail when that bug is fixed)
adapter_path = tmp_path / "bad_adapter"
os.mkdir(adapter_path)
with open(adapter_path / "adapter_model_config.json", "w") as f:
json.dump({"not": "real"}, f)
with open(adapter_path / "adapter_model.safetensors", "wb") as f:
f.write(b"this is fake")
# dtype, max-len etc set so that this can run in CI
args = [
"--dtype",
"bfloat16",
"--max-model-len",
"8192",
"--enforce-eager",
"--max-num-seqs",
"128",
"--enable-lora",
"--lora-modules",
f"bad-adapter={tmp_path / 'bad_adapter'}",
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
async with remote_server.get_async_client() as client:
with pytest.raises(
(openai.APIConnectionError, openai.InternalServerError)):
# This crashes the engine
await client.completions.create(model="bad-adapter",
prompt="Hello, my name is")
# Now the server should shut down
return_code = remote_server.proc.wait(timeout=8)
assert return_code is not None