[ci] add vllm_test_utils (#10659)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
940635343a
commit
334d64d1e8
@ -191,6 +191,10 @@ ADD . /vllm-workspace/
|
|||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
python3 -m pip install -r requirements-dev.txt
|
python3 -m pip install -r requirements-dev.txt
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
# enable fast downloads from hf (for testing)
|
# enable fast downloads from hf (for testing)
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
python3 -m pip install hf_transfer
|
python3 -m pip install hf_transfer
|
||||||
|
@ -62,4 +62,8 @@ WORKDIR /workspace/
|
|||||||
|
|
||||||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||||
|
@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
|
|||||||
|
|
||||||
RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
|
RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
WORKDIR /workspace/
|
WORKDIR /workspace/
|
||||||
|
|
||||||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
||||||
|
@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron
|
|||||||
RUN --mount=type=bind,source=.git,target=.git \
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
pip install --no-build-isolation -v -e .
|
pip install --no-build-isolation -v -e .
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC
|
|||||||
COPY examples/ /workspace/examples
|
COPY examples/ /workspace/examples
|
||||||
COPY benchmarks/ /workspace/benchmarks
|
COPY benchmarks/ /workspace/benchmarks
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
RUN --mount=type=bind,source=.git,target=.git \
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
VLLM_TARGET_DEVICE=cpu python3 setup.py install
|
VLLM_TARGET_DEVICE=cpu python3 setup.py install
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
WORKDIR /workspace/
|
WORKDIR /workspace/
|
||||||
|
|
||||||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
||||||
|
@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
if ls libs/*.whl; then \
|
if ls libs/*.whl; then \
|
||||||
python3 -m pip install libs/*.whl; fi
|
python3 -m pip install libs/*.whl; fi
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
-r requirements-tpu.txt
|
-r requirements-tpu.txt
|
||||||
RUN python3 setup.py develop
|
RUN python3 setup.py develop
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
|
|
||||||
ENV VLLM_USAGE_SOURCE production-docker-image \
|
ENV VLLM_USAGE_SOURCE production-docker-image \
|
||||||
TRITON_XPU_PROFILE 1
|
TRITON_XPU_PROFILE 1
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
||||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from vllm_test_utils import blame
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
|
|
||||||
def test_lazy_outlines(sample_regex):
|
def run_normal():
|
||||||
"""If users don't use guided decoding, outlines should not be imported.
|
|
||||||
"""
|
|
||||||
prompts = [
|
prompts = [
|
||||||
"Hello, my name is",
|
"Hello, my name is",
|
||||||
"The president of the United States is",
|
"The president of the United States is",
|
||||||
@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex):
|
|||||||
generated_text = output.outputs[0].text
|
generated_text = output.outputs[0].text
|
||||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
||||||
|
|
||||||
# make sure outlines is not imported
|
|
||||||
assert 'outlines' not in sys.modules
|
|
||||||
|
|
||||||
# Destroy the LLM object and free up the GPU memory.
|
# Destroy the LLM object and free up the GPU memory.
|
||||||
del llm
|
del llm
|
||||||
cleanup_dist_env_and_memory()
|
cleanup_dist_env_and_memory()
|
||||||
|
|
||||||
|
|
||||||
|
def run_lmfe(sample_regex):
|
||||||
# Create an LLM with guided decoding enabled.
|
# Create an LLM with guided decoding enabled.
|
||||||
llm = LLM(model="facebook/opt-125m",
|
llm = LLM(model="facebook/opt-125m",
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex):
|
|||||||
generated_text = output.outputs[0].text
|
generated_text = output.outputs[0].text
|
||||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_lazy_outlines(sample_regex):
|
||||||
|
"""If users don't use guided decoding, outlines should not be imported.
|
||||||
|
"""
|
||||||
# make sure outlines is not imported
|
# make sure outlines is not imported
|
||||||
assert 'outlines' not in sys.modules
|
module_name = "outlines"
|
||||||
|
with blame(lambda: module_name in sys.modules) as result:
|
||||||
|
run_normal()
|
||||||
|
run_lmfe(sample_regex)
|
||||||
|
assert not result.found, (
|
||||||
|
f"Module {module_name} is already imported, the"
|
||||||
|
f" first import location is:\n{result.trace_stack}")
|
||||||
|
@ -1,61 +1,9 @@
|
|||||||
# Description: Test the lazy import module
|
# Description: Test the lazy import module
|
||||||
# The utility function cannot be placed in `vllm.utils`
|
# The utility function cannot be placed in `vllm.utils`
|
||||||
# this needs to be a standalone script
|
# this needs to be a standalone script
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import dataclasses
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
|
||||||
from typing import Callable, Generator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class BlameResult:
|
|
||||||
found: bool = False
|
|
||||||
trace_stack: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def blame(func: Callable) -> Generator[BlameResult, None, None]:
|
|
||||||
"""
|
|
||||||
Trace the function calls to find the first function that satisfies the
|
|
||||||
condition. The trace stack will be stored in the result.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
|
|
||||||
```python
|
|
||||||
with blame(lambda: some_condition()) as result:
|
|
||||||
# do something
|
|
||||||
|
|
||||||
if result.found:
|
|
||||||
print(result.trace_stack)
|
|
||||||
"""
|
|
||||||
result = BlameResult()
|
|
||||||
|
|
||||||
def _trace_calls(frame, event, arg=None):
|
|
||||||
nonlocal result
|
|
||||||
if event in ['call', 'return']:
|
|
||||||
# for every function call or return
|
|
||||||
try:
|
|
||||||
# Temporarily disable the trace function
|
|
||||||
sys.settrace(None)
|
|
||||||
# check condition here
|
|
||||||
if not result.found and func():
|
|
||||||
result.found = True
|
|
||||||
result.trace_stack = "".join(traceback.format_stack())
|
|
||||||
# Re-enable the trace function
|
|
||||||
sys.settrace(_trace_calls)
|
|
||||||
except NameError:
|
|
||||||
# modules are deleted during shutdown
|
|
||||||
pass
|
|
||||||
return _trace_calls
|
|
||||||
|
|
||||||
sys.settrace(_trace_calls)
|
|
||||||
|
|
||||||
yield result
|
|
||||||
|
|
||||||
sys.settrace(None)
|
|
||||||
|
|
||||||
|
from vllm_test_utils import blame
|
||||||
|
|
||||||
module_name = "torch._inductor.async_compile"
|
module_name = "torch._inductor.async_compile"
|
||||||
|
|
||||||
|
7
tests/vllm_test_utils/setup.py
Normal file
7
tests/vllm_test_utils/setup.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='vllm_test_utils',
|
||||||
|
version='0.1',
|
||||||
|
packages=['vllm_test_utils'],
|
||||||
|
)
|
8
tests/vllm_test_utils/vllm_test_utils/__init__.py
Normal file
8
tests/vllm_test_utils/vllm_test_utils/__init__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
"""
|
||||||
|
vllm_utils is a package for vLLM testing utilities.
|
||||||
|
It does not import any vLLM modules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .blame import BlameResult, blame
|
||||||
|
|
||||||
|
__all__ = ["blame", "BlameResult"]
|
53
tests/vllm_test_utils/vllm_test_utils/blame.py
Normal file
53
tests/vllm_test_utils/vllm_test_utils/blame.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import contextlib
|
||||||
|
import dataclasses
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from typing import Callable, Generator
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class BlameResult:
|
||||||
|
found: bool = False
|
||||||
|
trace_stack: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def blame(func: Callable) -> Generator[BlameResult, None, None]:
|
||||||
|
"""
|
||||||
|
Trace the function calls to find the first function that satisfies the
|
||||||
|
condition. The trace stack will be stored in the result.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
```python
|
||||||
|
with blame(lambda: some_condition()) as result:
|
||||||
|
# do something
|
||||||
|
|
||||||
|
if result.found:
|
||||||
|
print(result.trace_stack)
|
||||||
|
"""
|
||||||
|
result = BlameResult()
|
||||||
|
|
||||||
|
def _trace_calls(frame, event, arg=None):
|
||||||
|
nonlocal result
|
||||||
|
if event in ['call', 'return']:
|
||||||
|
# for every function call or return
|
||||||
|
try:
|
||||||
|
# Temporarily disable the trace function
|
||||||
|
sys.settrace(None)
|
||||||
|
# check condition here
|
||||||
|
if not result.found and func():
|
||||||
|
result.found = True
|
||||||
|
result.trace_stack = "".join(traceback.format_stack())
|
||||||
|
# Re-enable the trace function
|
||||||
|
sys.settrace(_trace_calls)
|
||||||
|
except NameError:
|
||||||
|
# modules are deleted during shutdown
|
||||||
|
pass
|
||||||
|
return _trace_calls
|
||||||
|
|
||||||
|
sys.settrace(_trace_calls)
|
||||||
|
|
||||||
|
yield result
|
||||||
|
|
||||||
|
sys.settrace(None)
|
Loading…
x
Reference in New Issue
Block a user