[Misc] Deprecation Warning when setting --engine-use-ray (#7424)
Signed-off-by: Wallas Santos <wallashss@ibm.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
parent
67d115db08
commit
70b746efcf
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
|
||||
"127.0.0.1", "--tokenizer-pool-size",
|
||||
str(tokenizer_pool_size)
|
||||
]
|
||||
|
||||
# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
|
||||
# to prevent `--engine-use-ray` raises an exception due to it deprecation
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
|
||||
|
||||
if engine_use_ray:
|
||||
commands.append("--engine-use-ray")
|
||||
if worker_use_ray:
|
||||
commands.append("--worker-use-ray")
|
||||
uvicorn_process = subprocess.Popen(commands)
|
||||
uvicorn_process = subprocess.Popen(commands, env=env_vars)
|
||||
yield
|
||||
uvicorn_process.terminate()
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
@ -106,11 +107,16 @@ async def test_new_requests_event():
|
||||
assert engine.engine.add_request_calls == 3
|
||||
assert engine.engine.step_calls == old_step_calls + 1
|
||||
|
||||
# Allow deprecated engine_use_ray to not raise exception
|
||||
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
|
||||
|
||||
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
|
||||
assert engine.get_model_config() is not None
|
||||
assert engine.get_tokenizer() is not None
|
||||
assert engine.get_decoding_config() is not None
|
||||
|
||||
os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
|
||||
|
||||
|
||||
def test_asyncio_run():
|
||||
wait_for_gpu_memory_to_clear(
|
||||
|
@ -23,7 +23,11 @@ def server():
|
||||
str(chatml_jinja_path),
|
||||
]
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
# Allow `--engine-use-ray`, otherwise the launch of the server throw
|
||||
# an error due to try to use a deprecated feature
|
||||
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
|
||||
with RemoteOpenAIServer(MODEL_NAME, args,
|
||||
env_dict=env_dict) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import os
|
||||
from itertools import cycle
|
||||
from typing import Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
@ -56,6 +57,11 @@ class AsyncLLM:
|
||||
) -> None:
|
||||
if "disable_log_stats" not in kwargs:
|
||||
kwargs["disable_log_stats"] = True
|
||||
|
||||
# Needed to engine_use_ray works as a deprecated feature,
|
||||
# otherwise the following constructor will raise an exception
|
||||
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
|
||||
|
||||
engine_args = AsyncEngineArgs(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
|
@ -923,7 +923,13 @@ class AsyncEngineArgs(EngineArgs):
|
||||
parser.add_argument('--engine-use-ray',
|
||||
action='store_true',
|
||||
help='Use Ray to start the LLM engine in a '
|
||||
'separate process as the server process.')
|
||||
'separate process as the server process.'
|
||||
'(DEPRECATED. This argument is deprecated '
|
||||
'and will be removed in a future update. '
|
||||
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
|
||||
'use it. See '
|
||||
'https://github.com/vllm-project/vllm/issues/7045.'
|
||||
')')
|
||||
parser.add_argument('--disable-log-requests',
|
||||
action='store_true',
|
||||
help='Disable logging requests.')
|
||||
|
@ -29,6 +29,7 @@ from vllm.prompt_adapter.request import PromptAdapterRequest
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import ExecuteModelRequest, SamplerOutput
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import print_warning_once
|
||||
|
||||
logger = init_logger(__name__)
|
||||
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||
@ -510,6 +511,20 @@ class AsyncLLMEngine:
|
||||
self.log_requests = log_requests
|
||||
self.engine = self._init_engine(*args, **kwargs)
|
||||
|
||||
if self.engine_use_ray:
|
||||
print_warning_once(
|
||||
"DEPRECATED. `--engine-use-ray` is deprecated and will "
|
||||
"be removed in a future update. "
|
||||
"See https://github.com/vllm-project/vllm/issues/7045.")
|
||||
|
||||
if envs.VLLM_ALLOW_ENGINE_USE_RAY:
|
||||
print_warning_once(
|
||||
"VLLM_ALLOW_ENGINE_USE_RAY is set, force engine use Ray")
|
||||
else:
|
||||
raise ValueError("`--engine-use-ray` is deprecated. "
|
||||
"Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to "
|
||||
"force use it")
|
||||
|
||||
self.background_loop: Optional[asyncio.Future] = None
|
||||
# We need to keep a reference to unshielded
|
||||
# task as well to prevent it from being garbage
|
||||
|
@ -55,6 +55,7 @@ if TYPE_CHECKING:
|
||||
VERBOSE: bool = False
|
||||
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
|
||||
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
|
||||
VLLM_ALLOW_ENGINE_USE_RAY: bool = False
|
||||
VLLM_PLUGINS: Optional[List[str]] = None
|
||||
|
||||
|
||||
@ -364,6 +365,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
(os.environ.get("VLLM_TEST_FORCE_FP8_MARLIN", "0").strip().lower() in
|
||||
("1", "true")),
|
||||
|
||||
# If set, allow running the engine as a separate ray actor,
|
||||
# which is a deprecated feature soon to be removed.
|
||||
# See https://github.com/vllm-project/vllm/issues/7045
|
||||
"VLLM_ALLOW_ENGINE_USE_RAY":
|
||||
lambda:
|
||||
(os.environ.get("VLLM_ALLOW_ENGINE_USE_RAY", "0").strip().lower() in
|
||||
("1", "true")),
|
||||
|
||||
# a list of plugin names to load, separated by commas.
|
||||
# if this is not set, it means all plugins will be loaded
|
||||
# if this is set to an empty string, no plugins will be loaded
|
||||
|
Loading…
x
Reference in New Issue
Block a user