vllm/tests/tpu/test_custom_dispatcher.py
youkaichao 7560ae5caf
[8/N] enable cli flag without a space (#10529)
Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-11-21 12:30:42 -08:00

23 lines
577 B
Python

import os
from vllm.config import CompilationLevel
from ..utils import compare_two_settings
# --enforce-eager on TPU causes graph compilation
# this times out default Health Check in the MQLLMEngine,
# so we set the timeout here to 30s
os.environ["VLLM_RPC_TIMEOUT"] = "30000"
def test_custom_dispatcher():
compare_two_settings(
"google/gemma-2b",
arg1=[
"--enforce-eager",
f"-O{CompilationLevel.DYNAMO_ONCE}",
],
arg2=["--enforce-eager", f"-O{CompilationLevel.DYNAMO_AS_IS}"],
env1={},
env2={})