vllm/tests/models/test_registry.py

# SPDX-License-Identifier: Apache-2.0

import warnings

import pytest
import torch.cuda

from vllm.model_executor.models import (is_pooling_model,
                                        is_text_generation_model,
                                        supports_multimodal)
from vllm.model_executor.models.adapters import (as_classification_model,
                                                 as_embedding_model,
                                                 as_reward_model)
from vllm.model_executor.models.registry import (_MULTIMODAL_MODELS,
                                                 _SPECULATIVE_DECODING_MODELS,
                                                 _TEXT_GENERATION_MODELS,
                                                 ModelRegistry)
from vllm.platforms import current_platform

from ..utils import create_new_process_for_each_test
from .registry import HF_EXAMPLE_MODELS


@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
def test_registry_imports(model_arch):
    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
    model_info.check_transformers_version(on_fail="skip")

    # Ensure all model classes can be imported successfully
    model_cls, _ = ModelRegistry.resolve_model_cls(model_arch)

    if model_arch in _SPECULATIVE_DECODING_MODELS:
        return  # Ignore these models which do not have a unified format

    if (model_arch in _TEXT_GENERATION_MODELS
            or model_arch in _MULTIMODAL_MODELS):
        assert is_text_generation_model(model_cls)

    # All vLLM models should be convertible to a pooling model
    assert is_pooling_model(as_classification_model(model_cls))
    assert is_pooling_model(as_embedding_model(model_cls))
    assert is_pooling_model(as_reward_model(model_cls))

    if model_arch in _MULTIMODAL_MODELS:
        assert supports_multimodal(model_cls)


@create_new_process_for_each_test()
@pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [
    ("LlamaForCausalLM", False, False, False),
    ("MllamaForConditionalGeneration", True, False, False),
    ("LlavaForConditionalGeneration", True, True, False),
    ("BertForSequenceClassification", False, False, True),
    ("RobertaForSequenceClassification", False, False, True),
    ("XLMRobertaForSequenceClassification", False, False, True),
])
def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
    assert ModelRegistry.is_multimodal_model(model_arch) is is_mm

    assert ModelRegistry.is_cross_encoder_model(model_arch) is is_ce

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)


@create_new_process_for_each_test()
@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
    ("MLPSpeculatorPreTrainedModel", False, False),
    ("DeepseekV2ForCausalLM", True, False),
    ("Qwen2VLForConditionalGeneration", True, True),
])
def test_registry_is_pp(model_arch, is_pp, init_cuda):
    assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)


def test_hf_registry_coverage():
    untested_archs = (ModelRegistry.get_supported_archs() -
                      HF_EXAMPLE_MODELS.get_supported_archs())

    assert not untested_archs, (
        "Please add the following architectures to "
        f"`tests/models/registry.py`: {untested_archs}")
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`

[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`import warnings`

[Bugfix][Model] Add base class for vision-language models (#4809) 2024-05-19 15:13:33 +08:00			`import pytest`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`import torch.cuda`
[Bugfix][Model] Add base class for vision-language models (#4809) 2024-05-19 15:13:33 +08:00
[Misc] Rename embedding classes to pooling (#10801) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-01 14:36:51 +08:00			`from vllm.model_executor.models import (is_pooling_model,`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`is_text_generation_model,`
			`supports_multimodal)`
[Model] Automatic conversion of classification and reward models (#11469) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-25 02:22:22 +08:00			`from vllm.model_executor.models.adapters import (as_classification_model,`
			`as_embedding_model,`
			`as_reward_model)`
[Model] Replace embedding models with pooling adapter (#10769) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-01 08:02:54 +08:00			`from vllm.model_executor.models.registry import (_MULTIMODAL_MODELS,`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`_SPECULATIVE_DECODING_MODELS,`
			`_TEXT_GENERATION_MODELS,`
			`ModelRegistry)`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`from vllm.platforms import current_platform`

[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810) Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com> Co-authored-by: TJian <tunjian.tan@embeddedllm.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2025-03-17 19:33:35 +08:00			`from ..utils import create_new_process_for_each_test`
[Bugfix] Fix unable to load some models (#10312) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-11-15 08:55:54 +08:00			`from .registry import HF_EXAMPLE_MODELS`
[Bugfix][Model] Add base class for vision-language models (#4809) 2024-05-19 15:13:33 +08:00

[Misc] Move registry to its own file (#9064) 2024-10-04 18:01:37 +08:00			`@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`def test_registry_imports(model_arch):`
[Model] Upgrade Aria to transformers 4.48 (#12203) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-01-20 17:58:48 +08:00			`model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)`
			`model_info.check_transformers_version(on_fail="skip")`

[Bugfix][Model] Add base class for vision-language models (#4809) 2024-05-19 15:13:33 +08:00			`# Ensure all model classes can be imported successfully`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`model_cls, _ = ModelRegistry.resolve_model_cls(model_arch)`

			`if model_arch in _SPECULATIVE_DECODING_MODELS:`
[Model] Replace embedding models with pooling adapter (#10769) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-01 08:02:54 +08:00			`return # Ignore these models which do not have a unified format`

			`if (model_arch in _TEXT_GENERATION_MODELS`
			`or model_arch in _MULTIMODAL_MODELS):`
			`assert is_text_generation_model(model_cls)`

[Model] Automatic conversion of classification and reward models (#11469) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-25 02:22:22 +08:00			`# All vLLM models should be convertible to a pooling model`
			`assert is_pooling_model(as_classification_model(model_cls))`
			`assert is_pooling_model(as_embedding_model(model_cls))`
			`assert is_pooling_model(as_reward_model(model_cls))`
[Model] Replace embedding models with pooling adapter (#10769) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-01 08:02:54 +08:00
			`if model_arch in _MULTIMODAL_MODELS:`
			`assert supports_multimodal(model_cls)`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00

[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810) Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com> Co-authored-by: TJian <tunjian.tan@embeddedllm.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2025-03-17 19:33:35 +08:00			`@create_new_process_for_each_test()`
Support Cross encoder models (#10400) Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Flavia Beo <flavia.beo@ibm.com> Co-authored-by: Flavia Beo <flavia.beo@ibm.com> 2024-11-24 23:56:20 -03:00			`@pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [`
			`("LlamaForCausalLM", False, False, False),`
			`("MllamaForConditionalGeneration", True, False, False),`
			`("LlavaForConditionalGeneration", True, True, False),`
			`("BertForSequenceClassification", False, False, True),`
			`("RobertaForSequenceClassification", False, False, True),`
			`("XLMRobertaForSequenceClassification", False, False, True),`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`])`
Support Cross encoder models (#10400) Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Flavia Beo <flavia.beo@ibm.com> Co-authored-by: Flavia Beo <flavia.beo@ibm.com> 2024-11-24 23:56:20 -03:00			`def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`assert ModelRegistry.is_multimodal_model(model_arch) is is_mm`

Support Cross encoder models (#10400) Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Flavia Beo <flavia.beo@ibm.com> Co-authored-by: Flavia Beo <flavia.beo@ibm.com> 2024-11-24 23:56:20 -03:00			`assert ModelRegistry.is_cross_encoder_model(model_arch) is is_ce`

[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`if init_cuda and current_platform.is_cuda_alike():`
			`assert not torch.cuda.is_initialized()`

			`ModelRegistry.resolve_model_cls(model_arch)`
			`if not torch.cuda.is_initialized():`
			`warnings.warn(`
			`"This model no longer initializes CUDA on import. "`
			`"Please test using a different one.",`
			`stacklevel=2)`


[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810) Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com> Co-authored-by: TJian <tunjian.tan@embeddedllm.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2025-03-17 19:33:35 +08:00			`@create_new_process_for_each_test()`
[Models] Add remaining model PP support (#7168) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Signed-off-by: Murali Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-03 19:56:58 -07:00			`@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [`
			`("MLPSpeculatorPreTrainedModel", False, False),`
			`("DeepseekV2ForCausalLM", True, False),`
			`("Qwen2VLForConditionalGeneration", True, True),`
			`])`
			`def test_registry_is_pp(model_arch, is_pp, init_cuda):`
			`assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp`

			`if init_cuda and current_platform.is_cuda_alike():`
			`assert not torch.cuda.is_initialized()`

			`ModelRegistry.resolve_model_cls(model_arch)`
			`if not torch.cuda.is_initialized():`
			`warnings.warn(`
			`"This model no longer initializes CUDA on import. "`
			`"Please test using a different one.",`
			`stacklevel=2)`
[Bugfix] Fix unable to load some models (#10312) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-11-15 08:55:54 +08:00

			`def test_hf_registry_coverage():`
[Model] Support Qwen2 embeddings and use tags to select model tests (#10184) 2024-11-15 12:23:09 +08:00			`untested_archs = (ModelRegistry.get_supported_archs() -`
			`HF_EXAMPLE_MODELS.get_supported_archs())`
[Bugfix] Fix unable to load some models (#10312) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-11-15 08:55:54 +08:00
			`assert not untested_archs, (`
			`"Please add the following architectures to "`
			f"`tests/models/registry.py`: {untested_archs}")