[misc][ci] fix cpu test with plugins (#7489)
This commit is contained in:
parent
97992802f3
commit
ea49e6a3c8
@ -23,7 +23,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
|
|||||||
# Run basic model test
|
# Run basic model test
|
||||||
docker exec cpu-test bash -c "
|
docker exec cpu-test bash -c "
|
||||||
pip install pytest Pillow protobuf
|
pip install pytest Pillow protobuf
|
||||||
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
|
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
|
||||||
|
|
||||||
# online inference
|
# online inference
|
||||||
docker exec cpu-test bash -c "
|
docker exec cpu-test bash -c "
|
||||||
|
@ -292,6 +292,7 @@ steps:
|
|||||||
- pytest -v -s distributed/test_chunked_prefill_distributed.py
|
- pytest -v -s distributed/test_chunked_prefill_distributed.py
|
||||||
- pytest -v -s distributed/test_multimodal_broadcast.py
|
- pytest -v -s distributed/test_multimodal_broadcast.py
|
||||||
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
|
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
|
||||||
|
- pip install -e ./plugins/vllm_add_dummy_model
|
||||||
- pytest -v -s distributed/test_distributed_oot.py
|
- pytest -v -s distributed/test_distributed_oot.py
|
||||||
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
|
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
|
||||||
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
|
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
|
||||||
|
@ -4,12 +4,10 @@ import pytest
|
|||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
# NOTE: the order of the tests is important
|
from ..utils import fork_new_process_for_each_test
|
||||||
# the first test does not load any plugins
|
|
||||||
# the second test loads the plugin
|
|
||||||
# they share the same process, so the plugin is loaded for the second test
|
|
||||||
|
|
||||||
|
|
||||||
|
@fork_new_process_for_each_test
|
||||||
def test_plugin(dummy_opt_path):
|
def test_plugin(dummy_opt_path):
|
||||||
os.environ["VLLM_PLUGINS"] = ""
|
os.environ["VLLM_PLUGINS"] = ""
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
@ -17,6 +15,7 @@ def test_plugin(dummy_opt_path):
|
|||||||
assert "are not supported for now" in str(excinfo.value)
|
assert "are not supported for now" in str(excinfo.value)
|
||||||
|
|
||||||
|
|
||||||
|
@fork_new_process_for_each_test
|
||||||
def test_oot_registration(dummy_opt_path):
|
def test_oot_registration(dummy_opt_path):
|
||||||
os.environ["VLLM_PLUGINS"] = "register_dummy_model"
|
os.environ["VLLM_PLUGINS"] = "register_dummy_model"
|
||||||
prompts = ["Hello, my name is", "The text does not matter"]
|
prompts = ["Hello, my name is", "The text does not matter"]
|
||||||
|
@ -10,7 +10,6 @@ from pathlib import Path
|
|||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import ray
|
|
||||||
import requests
|
import requests
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from typing_extensions import ParamSpec
|
from typing_extensions import ParamSpec
|
||||||
@ -18,9 +17,10 @@ from typing_extensions import ParamSpec
|
|||||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||||
init_distributed_environment)
|
init_distributed_environment)
|
||||||
from vllm.entrypoints.openai.cli_args import make_arg_parser
|
from vllm.entrypoints.openai.cli_args import make_arg_parser
|
||||||
|
from vllm.platforms import current_platform
|
||||||
from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip
|
from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip
|
||||||
|
|
||||||
if is_hip():
|
if current_platform.is_rocm():
|
||||||
from amdsmi import (amdsmi_get_gpu_vram_usage,
|
from amdsmi import (amdsmi_get_gpu_vram_usage,
|
||||||
amdsmi_get_processor_handles, amdsmi_init,
|
amdsmi_get_processor_handles, amdsmi_init,
|
||||||
amdsmi_shut_down)
|
amdsmi_shut_down)
|
||||||
@ -32,7 +32,7 @@ if is_hip():
|
|||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
amdsmi_shut_down()
|
amdsmi_shut_down()
|
||||||
else:
|
elif current_platform.is_cuda():
|
||||||
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
|
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
|
||||||
nvmlInit, nvmlShutdown)
|
nvmlInit, nvmlShutdown)
|
||||||
|
|
||||||
@ -43,6 +43,11 @@ else:
|
|||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
nvmlShutdown()
|
nvmlShutdown()
|
||||||
|
else:
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _nvml():
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
VLLM_PATH = Path(__file__).parent.parent
|
VLLM_PATH = Path(__file__).parent.parent
|
||||||
@ -293,6 +298,8 @@ def multi_process_parallel(
|
|||||||
pp_size: int,
|
pp_size: int,
|
||||||
test_target: Any,
|
test_target: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
import ray
|
||||||
|
|
||||||
# Using ray helps debugging the error when it failed
|
# Using ray helps debugging the error when it failed
|
||||||
# as compared to multiprocessing.
|
# as compared to multiprocessing.
|
||||||
# NOTE: We need to set working_dir for distributed tests,
|
# NOTE: We need to set working_dir for distributed tests,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user