[misc][ci] fix cpu test with plugins (#7489)

This commit is contained in:
youkaichao 2024-08-13 19:27:46 -07:00 committed by GitHub
parent 97992802f3
commit ea49e6a3c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 15 additions and 8 deletions

View File

@ -23,7 +23,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
# Run basic model test # Run basic model test
docker exec cpu-test bash -c " docker exec cpu-test bash -c "
pip install pytest Pillow protobuf pip install pytest Pillow protobuf
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
# online inference # online inference
docker exec cpu-test bash -c " docker exec cpu-test bash -c "

View File

@ -292,6 +292,7 @@ steps:
- pytest -v -s distributed/test_chunked_prefill_distributed.py - pytest -v -s distributed/test_chunked_prefill_distributed.py
- pytest -v -s distributed/test_multimodal_broadcast.py - pytest -v -s distributed/test_multimodal_broadcast.py
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
- pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py - pytest -v -s distributed/test_distributed_oot.py
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py

View File

@ -4,12 +4,10 @@ import pytest
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
# NOTE: the order of the tests is important from ..utils import fork_new_process_for_each_test
# the first test does not load any plugins
# the second test loads the plugin
# they share the same process, so the plugin is loaded for the second test
@fork_new_process_for_each_test
def test_plugin(dummy_opt_path): def test_plugin(dummy_opt_path):
os.environ["VLLM_PLUGINS"] = "" os.environ["VLLM_PLUGINS"] = ""
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
@ -17,6 +15,7 @@ def test_plugin(dummy_opt_path):
assert "are not supported for now" in str(excinfo.value) assert "are not supported for now" in str(excinfo.value)
@fork_new_process_for_each_test
def test_oot_registration(dummy_opt_path): def test_oot_registration(dummy_opt_path):
os.environ["VLLM_PLUGINS"] = "register_dummy_model" os.environ["VLLM_PLUGINS"] = "register_dummy_model"
prompts = ["Hello, my name is", "The text does not matter"] prompts = ["Hello, my name is", "The text does not matter"]

View File

@ -10,7 +10,6 @@ from pathlib import Path
from typing import Any, Callable, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional
import openai import openai
import ray
import requests import requests
from transformers import AutoTokenizer from transformers import AutoTokenizer
from typing_extensions import ParamSpec from typing_extensions import ParamSpec
@ -18,9 +17,10 @@ from typing_extensions import ParamSpec
from vllm.distributed import (ensure_model_parallel_initialized, from vllm.distributed import (ensure_model_parallel_initialized,
init_distributed_environment) init_distributed_environment)
from vllm.entrypoints.openai.cli_args import make_arg_parser from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.platforms import current_platform
from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip
if is_hip(): if current_platform.is_rocm():
from amdsmi import (amdsmi_get_gpu_vram_usage, from amdsmi import (amdsmi_get_gpu_vram_usage,
amdsmi_get_processor_handles, amdsmi_init, amdsmi_get_processor_handles, amdsmi_init,
amdsmi_shut_down) amdsmi_shut_down)
@ -32,7 +32,7 @@ if is_hip():
yield yield
finally: finally:
amdsmi_shut_down() amdsmi_shut_down()
else: elif current_platform.is_cuda():
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
nvmlInit, nvmlShutdown) nvmlInit, nvmlShutdown)
@ -43,6 +43,11 @@ else:
yield yield
finally: finally:
nvmlShutdown() nvmlShutdown()
else:
@contextmanager
def _nvml():
yield
VLLM_PATH = Path(__file__).parent.parent VLLM_PATH = Path(__file__).parent.parent
@ -293,6 +298,8 @@ def multi_process_parallel(
pp_size: int, pp_size: int,
test_target: Any, test_target: Any,
) -> None: ) -> None:
import ray
# Using ray helps debugging the error when it failed # Using ray helps debugging the error when it failed
# as compared to multiprocessing. # as compared to multiprocessing.
# NOTE: We need to set working_dir for distributed tests, # NOTE: We need to set working_dir for distributed tests,