[misc][ci] fix cpu test with plugins (#7489)
This commit is contained in:
parent
97992802f3
commit
ea49e6a3c8
@ -23,7 +23,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
|
||||
# Run basic model test
|
||||
docker exec cpu-test bash -c "
|
||||
pip install pytest Pillow protobuf
|
||||
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
|
||||
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
|
||||
|
||||
# online inference
|
||||
docker exec cpu-test bash -c "
|
||||
|
@ -292,6 +292,7 @@ steps:
|
||||
- pytest -v -s distributed/test_chunked_prefill_distributed.py
|
||||
- pytest -v -s distributed/test_multimodal_broadcast.py
|
||||
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
|
||||
- pip install -e ./plugins/vllm_add_dummy_model
|
||||
- pytest -v -s distributed/test_distributed_oot.py
|
||||
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
|
||||
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
|
||||
|
@ -4,12 +4,10 @@ import pytest
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
# NOTE: the order of the tests is important
|
||||
# the first test does not load any plugins
|
||||
# the second test loads the plugin
|
||||
# they share the same process, so the plugin is loaded for the second test
|
||||
from ..utils import fork_new_process_for_each_test
|
||||
|
||||
|
||||
@fork_new_process_for_each_test
|
||||
def test_plugin(dummy_opt_path):
|
||||
os.environ["VLLM_PLUGINS"] = ""
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
@ -17,6 +15,7 @@ def test_plugin(dummy_opt_path):
|
||||
assert "are not supported for now" in str(excinfo.value)
|
||||
|
||||
|
||||
@fork_new_process_for_each_test
|
||||
def test_oot_registration(dummy_opt_path):
|
||||
os.environ["VLLM_PLUGINS"] = "register_dummy_model"
|
||||
prompts = ["Hello, my name is", "The text does not matter"]
|
||||
|
@ -10,7 +10,6 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import openai
|
||||
import ray
|
||||
import requests
|
||||
from transformers import AutoTokenizer
|
||||
from typing_extensions import ParamSpec
|
||||
@ -18,9 +17,10 @@ from typing_extensions import ParamSpec
|
||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||
init_distributed_environment)
|
||||
from vllm.entrypoints.openai.cli_args import make_arg_parser
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip
|
||||
|
||||
if is_hip():
|
||||
if current_platform.is_rocm():
|
||||
from amdsmi import (amdsmi_get_gpu_vram_usage,
|
||||
amdsmi_get_processor_handles, amdsmi_init,
|
||||
amdsmi_shut_down)
|
||||
@ -32,7 +32,7 @@ if is_hip():
|
||||
yield
|
||||
finally:
|
||||
amdsmi_shut_down()
|
||||
else:
|
||||
elif current_platform.is_cuda():
|
||||
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
|
||||
nvmlInit, nvmlShutdown)
|
||||
|
||||
@ -43,6 +43,11 @@ else:
|
||||
yield
|
||||
finally:
|
||||
nvmlShutdown()
|
||||
else:
|
||||
|
||||
@contextmanager
|
||||
def _nvml():
|
||||
yield
|
||||
|
||||
|
||||
VLLM_PATH = Path(__file__).parent.parent
|
||||
@ -293,6 +298,8 @@ def multi_process_parallel(
|
||||
pp_size: int,
|
||||
test_target: Any,
|
||||
) -> None:
|
||||
import ray
|
||||
|
||||
# Using ray helps debugging the error when it failed
|
||||
# as compared to multiprocessing.
|
||||
# NOTE: We need to set working_dir for distributed tests,
|
||||
|
Loading…
x
Reference in New Issue
Block a user