2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2024-08-08 10:47:48 -06:00
|
|
|
import openai
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from ...utils import RemoteOpenAIServer
|
|
|
|
|
2024-10-22 01:52:14 -03:00
|
|
|
MODEL_NAME = "meta-llama/Llama-3.2-1B"
|
2024-08-08 10:47:48 -06:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.asyncio
|
2025-01-10 00:56:36 -07:00
|
|
|
async def test_shutdown_on_engine_failure():
|
2024-08-08 10:47:48 -06:00
|
|
|
# dtype, max-len etc set so that this can run in CI
|
|
|
|
args = [
|
|
|
|
"--dtype",
|
|
|
|
"bfloat16",
|
|
|
|
"--max-model-len",
|
|
|
|
"8192",
|
|
|
|
"--enforce-eager",
|
|
|
|
"--max-num-seqs",
|
|
|
|
"128",
|
|
|
|
]
|
|
|
|
|
|
|
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
2024-08-26 21:33:17 -07:00
|
|
|
async with remote_server.get_async_client() as client:
|
2024-08-08 10:47:48 -06:00
|
|
|
|
2024-08-26 21:33:17 -07:00
|
|
|
with pytest.raises(
|
|
|
|
(openai.APIConnectionError, openai.InternalServerError)):
|
2025-01-10 00:56:36 -07:00
|
|
|
# Asking for lots of prompt logprobs will currently crash the
|
|
|
|
# engine. This may change in the future when that bug is fixed
|
|
|
|
prompt = "Hello " * 4000
|
|
|
|
await client.completions.create(
|
|
|
|
model=MODEL_NAME,
|
|
|
|
prompt=prompt,
|
|
|
|
extra_body={"prompt_logprobs": 10})
|
2024-08-08 10:47:48 -06:00
|
|
|
|
2024-08-26 21:33:17 -07:00
|
|
|
# Now the server should shut down
|
2024-09-18 09:56:58 -04:00
|
|
|
return_code = remote_server.proc.wait(timeout=8)
|
2024-08-26 21:33:17 -07:00
|
|
|
assert return_code is not None
|