56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
import warnings
|
|
|
|
import pytest
|
|
import torch.cuda
|
|
|
|
from vllm.model_executor.models import ModelRegistry
|
|
from vllm.platforms import current_platform
|
|
|
|
from ..utils import fork_new_process_for_each_test
|
|
|
|
|
|
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
|
|
def test_registry_imports(model_arch):
|
|
# Ensure all model classes can be imported successfully
|
|
ModelRegistry.resolve_model_cls(model_arch)
|
|
|
|
|
|
@fork_new_process_for_each_test
|
|
@pytest.mark.parametrize("model_arch,is_mm,init_cuda", [
|
|
("LlamaForCausalLM", False, False),
|
|
("MllamaForConditionalGeneration", True, False),
|
|
("LlavaForConditionalGeneration", True, True),
|
|
])
|
|
def test_registry_is_multimodal(model_arch, is_mm, init_cuda):
|
|
assert ModelRegistry.is_multimodal_model(model_arch) is is_mm
|
|
|
|
if init_cuda and current_platform.is_cuda_alike():
|
|
assert not torch.cuda.is_initialized()
|
|
|
|
ModelRegistry.resolve_model_cls(model_arch)
|
|
if not torch.cuda.is_initialized():
|
|
warnings.warn(
|
|
"This model no longer initializes CUDA on import. "
|
|
"Please test using a different one.",
|
|
stacklevel=2)
|
|
|
|
|
|
@fork_new_process_for_each_test
|
|
@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
|
|
("MLPSpeculatorPreTrainedModel", False, False),
|
|
("DeepseekV2ForCausalLM", True, False),
|
|
("Qwen2VLForConditionalGeneration", True, True),
|
|
])
|
|
def test_registry_is_pp(model_arch, is_pp, init_cuda):
|
|
assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp
|
|
|
|
if init_cuda and current_platform.is_cuda_alike():
|
|
assert not torch.cuda.is_initialized()
|
|
|
|
ModelRegistry.resolve_model_cls(model_arch)
|
|
if not torch.cuda.is_initialized():
|
|
warnings.warn(
|
|
"This model no longer initializes CUDA on import. "
|
|
"Please test using a different one.",
|
|
stacklevel=2)
|