[Misc] Deprecation Warning when setting --engine-use-ray (#7424)

Signed-off-by: Wallas Santos <wallashss@ibm.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
Wallas Henrique 2024-08-14 13:44:27 -03:00 committed by GitHub
parent 67d115db08
commit 70b746efcf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 56 additions and 3 deletions

View File

@ -1,3 +1,4 @@
import os
import subprocess
import sys
import time
@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
"127.0.0.1", "--tokenizer-pool-size",
str(tokenizer_pool_size)
]
# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
# to prevent `--engine-use-ray` raises an exception due to it deprecation
env_vars = os.environ.copy()
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
if engine_use_ray:
commands.append("--engine-use-ray")
if worker_use_ray:
commands.append("--worker-use-ray")
uvicorn_process = subprocess.Popen(commands)
uvicorn_process = subprocess.Popen(commands, env=env_vars)
yield
uvicorn_process.terminate()

View File

@ -1,4 +1,5 @@
import asyncio
import os
from dataclasses import dataclass
import pytest
@ -106,11 +107,16 @@ async def test_new_requests_event():
assert engine.engine.add_request_calls == 3
assert engine.engine.step_calls == old_step_calls + 1
# Allow deprecated engine_use_ray to not raise exception
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
assert engine.get_model_config() is not None
assert engine.get_tokenizer() is not None
assert engine.get_decoding_config() is not None
os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
def test_asyncio_run():
wait_for_gpu_memory_to_clear(

View File

@ -23,7 +23,11 @@ def server():
str(chatml_jinja_path),
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
# Allow `--engine-use-ray`, otherwise the launch of the server throw
# an error due to try to use a deprecated feature
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
with RemoteOpenAIServer(MODEL_NAME, args,
env_dict=env_dict) as remote_server:
yield remote_server

View File

@ -1,4 +1,5 @@
import asyncio
import os
from itertools import cycle
from typing import Dict, List, Optional, Sequence, Tuple, Union
@ -56,6 +57,11 @@ class AsyncLLM:
) -> None:
if "disable_log_stats" not in kwargs:
kwargs["disable_log_stats"] = True
# Needed to engine_use_ray works as a deprecated feature,
# otherwise the following constructor will raise an exception
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
engine_args = AsyncEngineArgs(
model=model,
tokenizer=tokenizer,

View File

@ -923,7 +923,13 @@ class AsyncEngineArgs(EngineArgs):
parser.add_argument('--engine-use-ray',
action='store_true',
help='Use Ray to start the LLM engine in a '
'separate process as the server process.')
'separate process as the server process.'
'(DEPRECATED. This argument is deprecated '
'and will be removed in a future update. '
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
'use it. See '
'https://github.com/vllm-project/vllm/issues/7045.'
')')
parser.add_argument('--disable-log-requests',
action='store_true',
help='Disable logging requests.')

View File

@ -29,6 +29,7 @@ from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.sampling_params import SamplingParams
from vllm.sequence import ExecuteModelRequest, SamplerOutput
from vllm.usage.usage_lib import UsageContext
from vllm.utils import print_warning_once
logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@ -510,6 +511,20 @@ class AsyncLLMEngine:
self.log_requests = log_requests
self.engine = self._init_engine(*args, **kwargs)
if self.engine_use_ray:
print_warning_once(
"DEPRECATED. `--engine-use-ray` is deprecated and will "
"be removed in a future update. "
"See https://github.com/vllm-project/vllm/issues/7045.")
if envs.VLLM_ALLOW_ENGINE_USE_RAY:
print_warning_once(
"VLLM_ALLOW_ENGINE_USE_RAY is set, force engine use Ray")
else:
raise ValueError("`--engine-use-ray` is deprecated. "
"Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to "
"force use it")
self.background_loop: Optional[asyncio.Future] = None
# We need to keep a reference to unshielded
# task as well to prevent it from being garbage

View File

@ -55,6 +55,7 @@ if TYPE_CHECKING:
VERBOSE: bool = False
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
VLLM_ALLOW_ENGINE_USE_RAY: bool = False
VLLM_PLUGINS: Optional[List[str]] = None
@ -364,6 +365,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
(os.environ.get("VLLM_TEST_FORCE_FP8_MARLIN", "0").strip().lower() in
("1", "true")),
# If set, allow running the engine as a separate ray actor,
# which is a deprecated feature soon to be removed.
# See https://github.com/vllm-project/vllm/issues/7045
"VLLM_ALLOW_ENGINE_USE_RAY":
lambda:
(os.environ.get("VLLM_ALLOW_ENGINE_USE_RAY", "0").strip().lower() in
("1", "true")),
# a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded