2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2024-10-03 19:56:58 -07:00
|
|
|
import warnings
|
|
|
|
|
2024-05-19 15:13:33 +08:00
|
|
|
import pytest
|
2024-10-03 19:56:58 -07:00
|
|
|
import torch.cuda
|
2024-05-19 15:13:33 +08:00
|
|
|
|
2024-12-01 14:36:51 +08:00
|
|
|
from vllm.model_executor.models import (is_pooling_model,
|
2024-10-07 14:10:35 +08:00
|
|
|
is_text_generation_model,
|
|
|
|
supports_multimodal)
|
2024-12-25 02:22:22 +08:00
|
|
|
from vllm.model_executor.models.adapters import (as_classification_model,
|
|
|
|
as_embedding_model,
|
|
|
|
as_reward_model)
|
2024-12-01 08:02:54 +08:00
|
|
|
from vllm.model_executor.models.registry import (_MULTIMODAL_MODELS,
|
2024-10-07 14:10:35 +08:00
|
|
|
_SPECULATIVE_DECODING_MODELS,
|
|
|
|
_TEXT_GENERATION_MODELS,
|
|
|
|
ModelRegistry)
|
2024-10-03 19:56:58 -07:00
|
|
|
from vllm.platforms import current_platform
|
|
|
|
|
2025-03-17 19:33:35 +08:00
|
|
|
from ..utils import create_new_process_for_each_test
|
2024-11-15 08:55:54 +08:00
|
|
|
from .registry import HF_EXAMPLE_MODELS
|
2024-05-19 15:13:33 +08:00
|
|
|
|
|
|
|
|
2024-10-04 18:01:37 +08:00
|
|
|
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
|
2024-10-03 19:56:58 -07:00
|
|
|
def test_registry_imports(model_arch):
|
2025-01-20 17:58:48 +08:00
|
|
|
model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
|
|
|
|
model_info.check_transformers_version(on_fail="skip")
|
|
|
|
|
2024-05-19 15:13:33 +08:00
|
|
|
# Ensure all model classes can be imported successfully
|
2024-10-07 14:10:35 +08:00
|
|
|
model_cls, _ = ModelRegistry.resolve_model_cls(model_arch)
|
|
|
|
|
|
|
|
if model_arch in _SPECULATIVE_DECODING_MODELS:
|
2024-12-01 08:02:54 +08:00
|
|
|
return # Ignore these models which do not have a unified format
|
|
|
|
|
|
|
|
if (model_arch in _TEXT_GENERATION_MODELS
|
|
|
|
or model_arch in _MULTIMODAL_MODELS):
|
|
|
|
assert is_text_generation_model(model_cls)
|
|
|
|
|
2024-12-25 02:22:22 +08:00
|
|
|
# All vLLM models should be convertible to a pooling model
|
|
|
|
assert is_pooling_model(as_classification_model(model_cls))
|
|
|
|
assert is_pooling_model(as_embedding_model(model_cls))
|
|
|
|
assert is_pooling_model(as_reward_model(model_cls))
|
2024-12-01 08:02:54 +08:00
|
|
|
|
|
|
|
if model_arch in _MULTIMODAL_MODELS:
|
|
|
|
assert supports_multimodal(model_cls)
|
2024-10-03 19:56:58 -07:00
|
|
|
|
|
|
|
|
2025-03-17 19:33:35 +08:00
|
|
|
@create_new_process_for_each_test()
|
2024-11-24 23:56:20 -03:00
|
|
|
@pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [
|
|
|
|
("LlamaForCausalLM", False, False, False),
|
|
|
|
("MllamaForConditionalGeneration", True, False, False),
|
|
|
|
("LlavaForConditionalGeneration", True, True, False),
|
|
|
|
("BertForSequenceClassification", False, False, True),
|
|
|
|
("RobertaForSequenceClassification", False, False, True),
|
|
|
|
("XLMRobertaForSequenceClassification", False, False, True),
|
2024-10-03 19:56:58 -07:00
|
|
|
])
|
2024-11-24 23:56:20 -03:00
|
|
|
def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
|
2024-10-03 19:56:58 -07:00
|
|
|
assert ModelRegistry.is_multimodal_model(model_arch) is is_mm
|
|
|
|
|
2024-11-24 23:56:20 -03:00
|
|
|
assert ModelRegistry.is_cross_encoder_model(model_arch) is is_ce
|
|
|
|
|
2024-10-03 19:56:58 -07:00
|
|
|
if init_cuda and current_platform.is_cuda_alike():
|
|
|
|
assert not torch.cuda.is_initialized()
|
|
|
|
|
|
|
|
ModelRegistry.resolve_model_cls(model_arch)
|
|
|
|
if not torch.cuda.is_initialized():
|
|
|
|
warnings.warn(
|
|
|
|
"This model no longer initializes CUDA on import. "
|
|
|
|
"Please test using a different one.",
|
|
|
|
stacklevel=2)
|
|
|
|
|
|
|
|
|
2025-03-17 19:33:35 +08:00
|
|
|
@create_new_process_for_each_test()
|
2024-10-03 19:56:58 -07:00
|
|
|
@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
|
|
|
|
("MLPSpeculatorPreTrainedModel", False, False),
|
|
|
|
("DeepseekV2ForCausalLM", True, False),
|
|
|
|
("Qwen2VLForConditionalGeneration", True, True),
|
|
|
|
])
|
|
|
|
def test_registry_is_pp(model_arch, is_pp, init_cuda):
|
|
|
|
assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp
|
|
|
|
|
|
|
|
if init_cuda and current_platform.is_cuda_alike():
|
|
|
|
assert not torch.cuda.is_initialized()
|
|
|
|
|
|
|
|
ModelRegistry.resolve_model_cls(model_arch)
|
|
|
|
if not torch.cuda.is_initialized():
|
|
|
|
warnings.warn(
|
|
|
|
"This model no longer initializes CUDA on import. "
|
|
|
|
"Please test using a different one.",
|
|
|
|
stacklevel=2)
|
2024-11-15 08:55:54 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_hf_registry_coverage():
|
2024-11-15 12:23:09 +08:00
|
|
|
untested_archs = (ModelRegistry.get_supported_archs() -
|
|
|
|
HF_EXAMPLE_MODELS.get_supported_archs())
|
2024-11-15 08:55:54 +08:00
|
|
|
|
|
|
|
assert not untested_archs, (
|
|
|
|
"Please add the following architectures to "
|
|
|
|
f"`tests/models/registry.py`: {untested_archs}")
|