[misc][ci] fix cpu test with plugins (#7489)

2024-08-13 19:27:46 -07:00 · 2024-08-13 19:27:46 -07:00 · ea49e6a3c8
commit ea49e6a3c8
parent 97992802f3
4 changed files with 15 additions and 8 deletions
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@ -23,7 +23,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
 # Run basic model test
 docker exec cpu-test bash -c "
  pip install pytest Pillow protobuf
-  pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
+  pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported

 # online inference
 docker exec cpu-test bash -c "
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -292,6 +292,7 @@ steps:
  - pytest -v -s distributed/test_chunked_prefill_distributed.py
  - pytest -v -s distributed/test_multimodal_broadcast.py
  - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
+  - pip install -e ./plugins/vllm_add_dummy_model
  - pytest -v -s distributed/test_distributed_oot.py
  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
--- a/tests/models/test_oot_registration.py
+++ b/tests/models/test_oot_registration.py
@ -4,12 +4,10 @@ import pytest

 from vllm import LLM, SamplingParams

-# NOTE: the order of the tests is important
-# the first test does not load any plugins
-# the second test loads the plugin
-# they share the same process, so the plugin is loaded for the second test
+from ..utils import fork_new_process_for_each_test


+@fork_new_process_for_each_test
 def test_plugin(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = ""
    with pytest.raises(Exception) as excinfo:
@ -17,6 +15,7 @@ def test_plugin(dummy_opt_path):
    assert "are not supported for now" in str(excinfo.value)


+@fork_new_process_for_each_test
 def test_oot_registration(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = "register_dummy_model"
    prompts = ["Hello, my name is", "The text does not matter"]
--- a/tests/utils.py
+++ b/tests/utils.py
@ -10,7 +10,6 @@ from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional

 import openai
-import ray
 import requests
 from transformers import AutoTokenizer
 from typing_extensions import ParamSpec
@ -18,9 +17,10 @@ from typing_extensions import ParamSpec
 from vllm.distributed import (ensure_model_parallel_initialized,
                              init_distributed_environment)
 from vllm.entrypoints.openai.cli_args import make_arg_parser
+from vllm.platforms import current_platform
 from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip

-if is_hip():
+if current_platform.is_rocm():
    from amdsmi import (amdsmi_get_gpu_vram_usage,
                        amdsmi_get_processor_handles, amdsmi_init,
                        amdsmi_shut_down)
@ -32,7 +32,7 @@ if is_hip():
            yield
        finally:
            amdsmi_shut_down()
-else:
+elif current_platform.is_cuda():
    from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
                        nvmlInit, nvmlShutdown)

@ -43,6 +43,11 @@ else:
            yield
        finally:
            nvmlShutdown()
+else:
+
+    @contextmanager
+    def _nvml():
+        yield


 VLLM_PATH = Path(__file__).parent.parent
@ -293,6 +298,8 @@ def multi_process_parallel(
    pp_size: int,
    test_target: Any,
 ) -> None:
+    import ray
+
    # Using ray helps debugging the error when it failed
    # as compared to multiprocessing.
    # NOTE: We need to set working_dir for distributed tests,