[ci] add vllm_test_utils (#10659)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-11-26 00:20:04 -08:00 · 2024-11-26 00:20:04 -08:00 · 334d64d1e8
commit 334d64d1e8
parent 940635343a
14 changed files with 113 additions and 61 deletions
--- a/4
+++ b/4
@ -191,6 +191,10 @@ ADD . /vllm-workspace/
 RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -e tests/vllm_test_utils
 # enable fast downloads from hf (for testing)
 RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install hf_transfer
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@ -62,4 +62,8 @@ WORKDIR /workspace/
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -e tests/vllm_test_utils
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
--- a/Dockerfile.hpu
+++ b/Dockerfile.hpu
@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
 RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 WORKDIR /workspace/
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
--- a/Dockerfile.neuron
+++ b/Dockerfile.neuron
@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron
 RUN --mount=type=bind,source=.git,target=.git \
    pip install --no-build-isolation -v -e .
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 CMD ["/bin/bash"]
--- a/Dockerfile.openvino
+++ b/Dockerfile.openvino
@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC
 COPY examples/ /workspace/examples
 COPY benchmarks/ /workspace/benchmarks
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 CMD ["/bin/bash"]
--- a/Dockerfile.ppc64le
+++ b/Dockerfile.ppc64le
@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip  \
 RUN --mount=type=bind,source=.git,target=.git \
    VLLM_TARGET_DEVICE=cpu python3 setup.py install
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 WORKDIR /workspace/
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
    if ls libs/*.whl; then \
    python3 -m pip install libs/*.whl; fi
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 CMD ["/bin/bash"]
--- a/Dockerfile.tpu
+++ b/Dockerfile.tpu
@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
        -r requirements-tpu.txt
 RUN python3 setup.py develop
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 CMD ["/bin/bash"]
--- a/Dockerfile.xpu
+++ b/Dockerfile.xpu
@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 ENV VLLM_USAGE_SOURCE production-docker-image \
    TRITON_XPU_PROFILE 1
-
+# install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
--- a/tests/entrypoints/llm/test_lazy_outlines.py
+++ b/tests/entrypoints/llm/test_lazy_outlines.py
@ -1,12 +1,12 @@
 import sys
 from vllm_test_utils import blame
 from vllm import LLM, SamplingParams
 from vllm.distributed import cleanup_dist_env_and_memory
-def test_lazy_outlines(sample_regex):
+def run_normal():
    """If users don't use guided decoding, outlines should not be imported.
    """
    prompts = [
        "Hello, my name is",
        "The president of the United States is",
@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex):
        generated_text = output.outputs[0].text
        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
    # make sure outlines is not imported
    assert 'outlines' not in sys.modules
    # Destroy the LLM object and free up the GPU memory.
    del llm
    cleanup_dist_env_and_memory()
 def run_lmfe(sample_regex):
    # Create an LLM with guided decoding enabled.
    llm = LLM(model="facebook/opt-125m",
              enforce_eager=True,
@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex):
        generated_text = output.outputs[0].text
        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 def test_lazy_outlines(sample_regex):
    """If users don't use guided decoding, outlines should not be imported.
    """
    # make sure outlines is not imported
-    assert 'outlines' not in sys.modules
+    module_name = "outlines"
    with blame(lambda: module_name in sys.modules) as result:
        run_normal()
        run_lmfe(sample_regex)
    assert not result.found, (
        f"Module {module_name} is already imported, the"
        f" first import location is:\n{result.trace_stack}")
--- a/tests/test_lazy_torch_compile.py
+++ b/tests/test_lazy_torch_compile.py
@ -1,61 +1,9 @@
 # Description: Test the lazy import module
 # The utility function cannot be placed in `vllm.utils`
 # this needs to be a standalone script
 import contextlib
 import dataclasses
 import sys
 import traceback
 from typing import Callable, Generator
@dataclasses.dataclass
 class BlameResult:
    found: bool = False
    trace_stack: str = ""
@contextlib.contextmanager
 def blame(func: Callable) -> Generator[BlameResult, None, None]:
    """
    Trace the function calls to find the first function that satisfies the
    condition. The trace stack will be stored in the result.
    Usage:
    ```python
    with blame(lambda: some_condition()) as result:
        # do something
    if result.found:
        print(result.trace_stack)
    """
    result = BlameResult()
    def _trace_calls(frame, event, arg=None):
        nonlocal result
        if event in ['call', 'return']:
            # for every function call or return
            try:
                # Temporarily disable the trace function
                sys.settrace(None)
                # check condition here
                if not result.found and func():
                    result.found = True
                    result.trace_stack = "".join(traceback.format_stack())
                # Re-enable the trace function
                sys.settrace(_trace_calls)
            except NameError:
                # modules are deleted during shutdown
                pass
        return _trace_calls
    sys.settrace(_trace_calls)
    yield result
    sys.settrace(None)
 from vllm_test_utils import blame
 module_name = "torch._inductor.async_compile"
--- a/tests/vllm_test_utils/setup.py
+++ b/tests/vllm_test_utils/setup.py
@ -0,0 +1,7 @@
 from setuptools import setup
 setup(
    name='vllm_test_utils',
    version='0.1',
    packages=['vllm_test_utils'],
 )
--- a/tests/vllm_test_utils/vllm_test_utils/init.py
+++ b/tests/vllm_test_utils/vllm_test_utils/init.py
@ -0,0 +1,8 @@
 """
 vllm_utils is a package for vLLM testing utilities.
 It does not import any vLLM modules.
 """
 from .blame import BlameResult, blame
 __all__ = ["blame", "BlameResult"]
--- a/tests/vllm_test_utils/vllm_test_utils/blame.py
+++ b/tests/vllm_test_utils/vllm_test_utils/blame.py
@ -0,0 +1,53 @@
 import contextlib
 import dataclasses
 import sys
 import traceback
 from typing import Callable, Generator
@dataclasses.dataclass
 class BlameResult:
    found: bool = False
    trace_stack: str = ""
@contextlib.contextmanager
 def blame(func: Callable) -> Generator[BlameResult, None, None]:
    """
    Trace the function calls to find the first function that satisfies the
    condition. The trace stack will be stored in the result.
    Usage:
    ```python
    with blame(lambda: some_condition()) as result:
        # do something
    if result.found:
        print(result.trace_stack)
    """
    result = BlameResult()
    def _trace_calls(frame, event, arg=None):
        nonlocal result
        if event in ['call', 'return']:
            # for every function call or return
            try:
                # Temporarily disable the trace function
                sys.settrace(None)
                # check condition here
                if not result.found and func():
                    result.found = True
                    result.trace_stack = "".join(traceback.format_stack())
                # Re-enable the trace function
                sys.settrace(_trace_calls)
            except NameError:
                # modules are deleted during shutdown
                pass
        return _trace_calls
    sys.settrace(_trace_calls)
    yield result
    sys.settrace(None)