vllm/tests/models/decoder_only/vision_language/test_models.py

# SPDX-License-Identifier: Apache-2.0
"""Common tests for testing .generate() functionality for single / multiple
image, embedding, and video support for different VLMs in vLLM.
"""
import math
import os
from collections import defaultdict
from pathlib import PosixPath

import pytest
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq

from vllm.platforms import current_platform
from vllm.utils import identity

from ....conftest import (IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets,
                          _VideoAssets)
from ....utils import (create_new_process_for_each_test, large_gpu_mark,
                       multi_gpu_marks)
from ...utils import check_outputs_equal
from .vlm_utils import custom_inputs, model_utils, runners
from .vlm_utils.case_filtering import get_parametrized_options
from .vlm_utils.types import (CustomTestOptions, ExpandableVLMTestArgs,
                              VLMTestInfo, VLMTestType)

# This hack is needed for phi3v & paligemma models
# ROCm Triton FA can run into shared memory issues with these models,
# use other backends in the meantime
# FIXME (mattwong, gshtrasb, hongxiayan)
if current_platform.is_rocm():
    os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"

REQUIRES_V0_MODELS = [
    # V1 Test: no way to fall back for head_dim = 80
    # https://github.com/vllm-project/vllm/issues/14524
    "qwen_vl",
    # V1 Test: not enough KV cache space in C1.
    "fuyu",
]

# yapf: disable
COMMON_BROADCAST_SETTINGS = {
    "test_type": VLMTestType.IMAGE,
    "dtype": "half",
    "max_tokens": 5,
    "tensor_parallel_size": 2,
    "hf_model_kwargs": {"device_map": "auto"},
    "image_size_factors": [(.25, 0.5, 1.0)],
    "distributed_executor_backend": (
        "ray",
        "mp",
    )
}

### Test configuration for specific models
# NOTE: The convention of the test settings below is to lead each test key
# with the name of the model arch used in the test, using underscores in place
# of hyphens; this makes it more convenient to filter tests for a specific kind
# of model. For example....
#
# To run all test types for a specific key:
#     use the k flag to substring match with a leading square bracket; if the
#     model arch happens to be a substring of another one, you can add a
#     trailing hyphen. E.g.,
#                 - pytest $TEST_FILE -k "[llava-"
#     prevents matching on "[llava_next-" & will match just the enabled cases
#     for llava, i.e., single image, image embedding, and custom input tests.
#
# To run a test for a Test Info for just one of multiple models:
#     use the k flag to substring match the model name, e.g.,
#                 - pytest $TEST_FILE -k OpenGVLab/InternVL2-1B
#     prevents matching on nGVLab/InternVL2-2B.
#
# You can also combine substrings to match more granularly.
#     ex 1:
#        pytest $TEST_FILE -k "test_single_image and OpenGVLab/InternVL2-1B"
#     will run only test_single_image* for OpenGVLab/InternVL2-1B; this would
#     match both wrappers for single image tests, since it also matches
#     test_single_image_heavy (which forks if we have a distributed backend)
#     ex 2:
#        pytest $TEST_FILE -k  "[llava- or [intern_vl-"
#     will run all of the tests for only llava & internvl.
#
# NOTE you can add --collect-only to any of the above commands to see
# which cases would be selected and deselected by pytest. In general,
# this is a good idea for checking your command first, since tests are slow.

VLM_TEST_SETTINGS = {
    #### Core tests to always run in the CI
    "llava": VLMTestInfo(
        models=["llava-hf/llava-1.5-7b-hf"],
        test_type=(
            VLMTestType.EMBEDDING,
            VLMTestType.IMAGE,
            VLMTestType.CUSTOM_INPUTS
        ),
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        convert_assets_to_embeddings=model_utils.get_llava_embeddings,
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        custom_test_opts=[CustomTestOptions(
            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
            ),
            limit_mm_per_prompt={"image": 4},
        )],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "paligemma": VLMTestInfo(
        models=["google/paligemma-3b-mix-224"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=identity,
        img_idx_to_prompt = lambda idx: "",
        # Paligemma uses its own sample prompts because the default one fails
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "caption es",
            "cherry_blossom": "What is in the picture?",
        }),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
        dtype="bfloat16",
        marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")],  # noqa: E501
    ),
    "qwen2_5_vl": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=(
            VLMTestType.IMAGE,
            VLMTestType.MULTI_IMAGE,
            VLMTestType.VIDEO
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForVision2Seq,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    #### Extended model tests
    "aria": VLMTestInfo(
        models=["rhymes-ai/Aria"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ", # noqa: E501
        img_idx_to_prompt=lambda idx: "<fim_prefix><|img|><fim_suffix>\n",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<vlm_image>Please describe the image shortly.",
            "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
        }),
        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",    # noqa: E501
        stop_str=["<|im_end|>"],
        image_size_factors=[(0.10, 0.15)],
        max_tokens=64,
        marks=[large_gpu_mark(min_gb=64)],
    ),
    "aya_vision": VLMTestInfo(
        models=["CohereForAI/aya-vision-8b"],
        test_type=(VLMTestType.IMAGE),
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<image>What is the season?",  # noqa: E501
        }),
        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
    ),
    "aya_vision-multi_image": VLMTestInfo(
        models=["CohereForAI/aya-vision-8b"],
        test_type=(VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<image>What is the season?",  # noqa: E501
        }),
        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "blip2": VLMTestInfo(
        # TODO: Change back to 2.7b once head_dim = 80 is supported
        models=["Salesforce/blip2-opt-6.7b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
        img_idx_to_prompt=lambda idx: "",
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
    ),
    "chameleon": VLMTestInfo(
        models=["facebook/chameleon-7b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        # For chameleon, we only compare the sequences
        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
        comparator=check_outputs_equal,
        max_tokens=8,
        dtype="bfloat16",
    ),
    "deepseek_vl_v2": VLMTestInfo(
        models=["Isotr0py/deepseek-vl2-tiny"], # model repo using dynamic module
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
            "cherry_blossom": "<image>\nPlease infer the season with reason in details.",   # noqa: E501
        }),
        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",    # noqa: E501
        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
        image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
    ),
    "fuyu": VLMTestInfo(
        models=["adept/fuyu-8b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"{img_prompt}\n",
        img_idx_to_prompt=lambda idx: "",
        max_model_len=2048,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        use_tokenizer_eos=True,
        vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
        num_logprobs=10,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
    ),
    "gemma3": VLMTestInfo(
        models=["google/gemma-3-4b-it"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
        }),
        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
        patch_hf_runner=model_utils.gemma3_patch_hf_runner,
    ),
    "glm4v": VLMTestInfo(
        models=["THUDM/glm-4v-9b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
        }),
        max_model_len=2048,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
        patch_hf_runner=model_utils.glm4v_patch_hf_runner,
        # The image embeddings match with HF but the outputs of the language
        # decoder are only consistent up to 2 decimal places.
        # So, we need to reduce the number of tokens for the test to pass.
        max_tokens=8,
        num_logprobs=10,
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "h2ovl": VLMTestInfo(
        models = [
            "h2oai/h2ovl-mississippi-800m",
            # TODO: Re-enable once head_dim = 80 is supported
            # "h2oai/h2ovl-mississippi-2b",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<image>\nWhat is the season?",
        }),
        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
        max_model_len=8192,
        use_tokenizer_eos=True,
        num_logprobs=10,
        patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
    ),
    "idefics3": VLMTestInfo(
        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
    ),
    "intern_vl": VLMTestInfo(
        models=[
            "OpenGVLab/InternVL2-1B",
            "OpenGVLab/InternVL2-2B",
            "OpenGVLab/Mono-InternVL-2B",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<image>\nWhat is the season?",
        }),
        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.internvl_patch_hf_runner,
    ),
    "llama4": VLMTestInfo(
        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
        img_idx_to_prompt=lambda _: "<|image|>",
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        distributed_executor_backend="mp",
        image_size_factors=[(.25, 0.5, 1.0)],
        hf_model_kwargs={"device_map": "auto"},
        max_model_len=8192,
        max_num_seqs=4,
        dtype="bfloat16",
        auto_cls=AutoModelForImageTextToText,
        tensor_parallel_size=8,
        vllm_runner_kwargs={"gpu_memory_utilization": 0.8},
        marks=multi_gpu_marks(num_gpus=8),
    ),
    "llava_next": VLMTestInfo(
        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
        test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
        max_model_len=10240,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        custom_test_opts=[CustomTestOptions(
            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
            ),
            limit_mm_per_prompt={"image": 4},
        )],
    ),
    "llava_onevision": VLMTestInfo(
        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
        num_video_frames=16,
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
        auto_cls=AutoModelForVision2Seq,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[CustomTestOptions(
            inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
            ),
            limit_mm_per_prompt={"video": 4},
            runner_mm_key="videos",
        )],
    ),
    "llava_next_video": VLMTestInfo(
        models=["llava-hf/LLaVA-NeXT-Video-7B-hf"],
        test_type=VLMTestType.VIDEO,
        prompt_formatter=lambda vid_prompt: f"USER: {vid_prompt} ASSISTANT:",
        num_video_frames=16,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForVision2Seq,
        vllm_output_post_proc=model_utils.llava_video_vllm_to_hf_output,
    ),
    "mantis": VLMTestInfo(
        models=["TIGER-Lab/Mantis-8B-siglip-llama3"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        max_model_len=4096,
        get_stop_token_ids=lambda tok: [128009],
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
        patch_hf_runner=model_utils.mantis_patch_hf_runner,
    ),
    "minicpmv_25": VLMTestInfo(
        models=["openbmb/MiniCPM-Llama3-V-2_5"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
    ),
    "minicpmo_26": VLMTestInfo(
        models=["openbmb/MiniCPM-o-2_6"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
    ),
    "minicpmv_26": VLMTestInfo(
        models=["openbmb/MiniCPM-V-2_6"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
    ),
    "molmo": VLMTestInfo(
        models=["allenai/Molmo-7B-D-0924"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=identity,
        max_model_len=4096,
        max_num_seqs=2,
        patch_hf_runner=model_utils.molmo_patch_hf_runner,
    ),
    # Tests for phi3v currently live in another file because of a bug in
    # transformers. Once this issue is fixed, we can enable them here instead.
    # https://github.com/huggingface/transformers/issues/34307
    # "phi3v": VLMTestInfo(
    #     models=["microsoft/Phi-3.5-vision-instruct"],
    #     test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
    #     prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n", # noqa: E501
    #     img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
    #     max_model_len=4096,
    #     max_num_seqs=2,
    #     task="generate",
    #     # use eager mode for hf runner since phi3v didn't work with flash_attn
    #     hf_model_kwargs={"_attn_implementation": "eager"},
    #     use_tokenizer_eos=True,
    #     vllm_output_post_proc=model_utils.phi3v_vllm_to_hf_output,
    #     num_logprobs=10,
    # ),
    "pixtral_hf": VLMTestInfo(
        models=["nm-testing/pixtral-12b-FP8-dynamic"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<s>[INST]{img_prompt}[/INST]",
        img_idx_to_prompt=lambda idx: "[IMG]",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        marks=[large_gpu_mark(min_gb=48)],
    ),
    "qwen_vl": VLMTestInfo(
        models=["Qwen/Qwen-VL"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=identity,
        img_idx_to_prompt=lambda idx: f"Picture {idx}: <img></img>\n",
        max_model_len=1024,
        max_num_seqs=2,
        vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
        prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
    ),
    "qwen2_vl": VLMTestInfo(
        models=["Qwen/Qwen2-VL-2B-Instruct"],
        test_type=(
            VLMTestType.IMAGE,
            VLMTestType.MULTI_IMAGE,
            VLMTestType.VIDEO
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForVision2Seq,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.cpu_model],
    ),
    "skywork_r1v": VLMTestInfo(
        models=["Skywork/Skywork-R1V-38B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n", # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts({
            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
            "cherry_blossom": "<image>\nWhat is the season?",
        }),
        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
        marks=[large_gpu_mark(min_gb=80)],
    ),
    ### Tensor parallel / multi-gpu broadcast tests
    "chameleon-broadcast": VLMTestInfo(
        models=["facebook/chameleon-7b"],
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
        comparator=check_outputs_equal,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS # type: ignore
    ),
    "llava-broadcast": VLMTestInfo(
        models=["llava-hf/llava-1.5-7b-hf"],
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS # type: ignore
    ),
    "llava_next-broadcast": VLMTestInfo(
        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
        max_model_len=10240,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS # type: ignore
    ),
    ### Custom input edge-cases for specific models
    "intern_vl-diff-patches": VLMTestInfo(
        models=["OpenGVLab/InternVL2-2B"],
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.internvl_patch_hf_runner,
        custom_test_opts=[
            CustomTestOptions(
                inputs=inp,
                limit_mm_per_prompt={"image": 2},
            ) for inp in custom_inputs.different_patch_input_cases_internvl()
        ],
    ),
    "llava_onevision-multiple-images": VLMTestInfo(
        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=16384,
        max_num_seqs=2,
        auto_cls=AutoModelForVision2Seq,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[CustomTestOptions(
            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
            ),
            limit_mm_per_prompt={"image": 4},
        )],
    ),
    # regression test for https://github.com/vllm-project/vllm/issues/15122
    "qwen2_5_vl-windows-attention": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForVision2Seq,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        custom_test_opts=[CustomTestOptions(
            inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
            limit_mm_per_prompt={"image": 1},
        )],
    ),
}
# yapf: enable


def _mark_splits(
    test_settings: dict[str, VLMTestInfo],
    *,
    num_groups: int,
) -> dict[str, VLMTestInfo]:
    name_by_test_info_id = {id(v): k for k, v in test_settings.items()}
    test_infos_by_model = defaultdict[str, list[VLMTestInfo]](list)

    for info in test_settings.values():
        for model in info.models:
            test_infos_by_model[model].append(info)

    models = sorted(test_infos_by_model.keys())
    split_size = math.ceil(len(models) / num_groups)

    new_test_settings = dict[str, VLMTestInfo]()

    for i in range(num_groups):
        models_in_group = models[i * split_size:(i + 1) * split_size]

        for model in models_in_group:
            for info in test_infos_by_model[model]:
                new_marks = (info.marks or []) + [pytest.mark.split(group=i)]
                new_info = info._replace(marks=new_marks)
                new_test_settings[name_by_test_info_id[id(info)]] = new_info

    missing_keys = test_settings.keys() - new_test_settings.keys()
    assert not missing_keys, f"Missing keys: {missing_keys}"

    return new_test_settings


VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)


### Test wrappers
# Wrappers around the core test running func for:
# - single image
# - multi-image
# - image embeddings
# - video
# - custom inputs
@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.IMAGE,
        create_new_process_for_each_test=False,
    ))
def test_single_image_models(tmp_path: PosixPath, model_type: str,
                             test_case: ExpandableVLMTestArgs,
                             hf_runner: type[HfRunner],
                             vllm_runner: type[VllmRunner],
                             image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.MULTI_IMAGE,
        create_new_process_for_each_test=False,
    ))
def test_multi_image_models(tmp_path: PosixPath, model_type: str,
                            test_case: ExpandableVLMTestArgs,
                            hf_runner: type[HfRunner],
                            vllm_runner: type[VllmRunner],
                            image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.EMBEDDING,
        create_new_process_for_each_test=False,
    ))
def test_image_embedding_models(model_type: str,
                                test_case: ExpandableVLMTestArgs,
                                hf_runner: type[HfRunner],
                                vllm_runner: type[VllmRunner],
                                image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.VIDEO,
        create_new_process_for_each_test=False,
    ))
def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
                      hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
                      video_assets: _VideoAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        video_assets=video_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.CUSTOM_INPUTS,
        create_new_process_for_each_test=False,
    ))
def test_custom_inputs_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    monkeypatch,
):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
    )


#### Tests filtering for things running each test as a new process
@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.IMAGE,
        create_new_process_for_each_test=True,
    ))
@create_new_process_for_each_test()
def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
                                   test_case: ExpandableVLMTestArgs,
                                   hf_runner: type[HfRunner],
                                   vllm_runner: type[VllmRunner],
                                   image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.MULTI_IMAGE,
        create_new_process_for_each_test=True,
    ))
@create_new_process_for_each_test()
def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
                                  test_case: ExpandableVLMTestArgs,
                                  hf_runner: type[HfRunner],
                                  vllm_runner: type[VllmRunner],
                                  image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.EMBEDDING,
        create_new_process_for_each_test=True,
    ))
@create_new_process_for_each_test()
def test_image_embedding_models_heavy(model_type: str,
                                      test_case: ExpandableVLMTestArgs,
                                      hf_runner: type[HfRunner],
                                      vllm_runner: type[VllmRunner],
                                      image_assets: _ImageAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.VIDEO,
        create_new_process_for_each_test=True,
    ))
def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
                            hf_runner: type[HfRunner],
                            vllm_runner: type[VllmRunner],
                            video_assets: _VideoAssets, monkeypatch):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        video_assets=video_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.CUSTOM_INPUTS,
        create_new_process_for_each_test=True,
    ))
@create_new_process_for_each_test()
def test_custom_inputs_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    monkeypatch,
):
    if model_type in REQUIRES_V0_MODELS:
        monkeypatch.setenv("VLLM_USE_V1", "0")
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
    )
-												[Misc] Add SPDX-License-Identifier headers to python source files (#12628)

- **Add SPDX license headers to python source files**
- **Check for SPDX headers using pre-commit**

commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:18:24 2025 -0500

    Add SPDX license headers to python source files
    
This commit adds SPDX license headers to python source files as
recommended to
the project by the Linux Foundation. These headers provide a concise way
that is
both human and machine readable for communicating license information
for each
source file. It helps avoid any ambiguity about the license of the code
and can
    also be easily used by tools to help manage license compliance.
    
The Linux Foundation runs license scans against the codebase to help
ensure
    we are in compliance with the licenses of the code we use, including
dependencies. Having these headers in place helps that tool do its job.
    
    More information can be found on the SPDX site:
    
    - https://spdx.dev/learn/handling-license-info/
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:36:32 2025 -0500

    Check for SPDX headers using pre-commit
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

---------

Signed-off-by: Russell Bryant <rbryant@redhat.com>
											
										
										
											2025-02-02 14:58:18 -05:00
+								# SPDX-License-Identifier: Apache-2.0
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								"""Common tests for testing .generate() functionality for single / multiple
 								image, embedding, and video support for different VLMs in vLLM.
 								"""
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								import math
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								import os
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								from collections import defaultdict
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								from pathlib import PosixPath
 								import pytest
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
 								from vllm.platforms import current_platform
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								from vllm.utils import identity
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
 								from ....conftest import (IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets,
 								                          _VideoAssets)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								from ....utils import (create_new_process_for_each_test, large_gpu_mark,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								                       multi_gpu_marks)
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								from ...utils import check_outputs_equal
 								from .vlm_utils import custom_inputs, model_utils, runners
 								from .vlm_utils.case_filtering import get_parametrized_options
 								from .vlm_utils.types import (CustomTestOptions, ExpandableVLMTestArgs,
 								                              VLMTestInfo, VLMTestType)
 								# This hack is needed for phi3v & paligemma models
 								# ROCm Triton FA can run into shared memory issues with these models,
 								# use other backends in the meantime
 								# FIXME (mattwong, gshtrasb, hongxiayan)
 								if current_platform.is_rocm():
 								    os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								REQUIRES_V0_MODELS = [
 								    # V1 Test: no way to fall back for head_dim = 80
 								    # https://github.com/vllm-project/vllm/issues/14524
 								    "qwen_vl",
 								    # V1 Test: not enough KV cache space in C1.
 								    "fuyu",
 								]
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								# yapf: disable
 								COMMON_BROADCAST_SETTINGS = {
 								    "test_type": VLMTestType.IMAGE,
 								    "dtype": "half",
 								    "max_tokens": 5,
 								    "tensor_parallel_size": 2,
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								    "hf_model_kwargs": {"device_map": "auto"},
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "image_size_factors": [(.25, 0.5, 1.0)],
 								    "distributed_executor_backend": (
 								        "ray",
 								        "mp",
 								    )
 								}
 								### Test configuration for specific models
 								# NOTE: The convention of the test settings below is to lead each test key
 								# with the name of the model arch used in the test, using underscores in place
 								# of hyphens; this makes it more convenient to filter tests for a specific kind
 								# of model. For example....
 								#
 								# To run all test types for a specific key:
 								#     use the k flag to substring match with a leading square bracket; if the
 								#     model arch happens to be a substring of another one, you can add a
 								#     trailing hyphen. E.g.,
 								#                 - pytest $TEST_FILE -k "[llava-"
 								#     prevents matching on "[llava_next-" & will match just the enabled cases
 								#     for llava, i.e., single image, image embedding, and custom input tests.
 								#
 								# To run a test for a Test Info for just one of multiple models:
 								#     use the k flag to substring match the model name, e.g.,
 								#                 - pytest $TEST_FILE -k OpenGVLab/InternVL2-1B
 								#     prevents matching on nGVLab/InternVL2-2B.
 								#
 								# You can also combine substrings to match more granularly.
 								#     ex 1:
 								#        pytest $TEST_FILE -k "test_single_image and OpenGVLab/InternVL2-1B"
 								#     will run only test_single_image* for OpenGVLab/InternVL2-1B; this would
 								#     match both wrappers for single image tests, since it also matches
 								#     test_single_image_heavy (which forks if we have a distributed backend)
 								#     ex 2:
 								#        pytest $TEST_FILE -k  "[llava- or [intern_vl-"
 								#     will run all of the tests for only llava & internvl.
 								#
 								# NOTE you can add --collect-only to any of the above commands to see
 								# which cases would be selected and deselected by pytest. In general,
 								# this is a good idea for checking your command first, since tests are slow.
 								VLM_TEST_SETTINGS = {
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    #### Core tests to always run in the CI
 								    "llava": VLMTestInfo(
 								        models=["llava-hf/llava-1.5-7b-hf"],
 								        test_type=(
 								            VLMTestType.EMBEDDING,
 								            VLMTestType.IMAGE,
 								            VLMTestType.CUSTOM_INPUTS
 								        ),
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        convert_assets_to_embeddings=model_utils.get_llava_embeddings,
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
 								        custom_test_opts=[CustomTestOptions(
 								            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
 								            ),
 								            limit_mm_per_prompt={"image": 4},
 								        )],
-												[CI/Build] Update CPU tests to include all "standard" tests (#5481)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-08 23:30:04 +08:00
+								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    ),
 								    "paligemma": VLMTestInfo(
 								        models=["google/paligemma-3b-mix-224"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=identity,
 								        img_idx_to_prompt = lambda idx: "",
 								        # Paligemma uses its own sample prompts because the default one fails
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "caption es",
 								            "cherry_blossom": "What is in the picture?",
 								        }),
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
-												[Model] Update Paligemma multimodal processing with PromptUpdate  (#14015)

Signed-off-by: Kyle Huang <kylhuang@nvidia.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
											
										
										
											2025-03-06 02:31:38 -06:00
+								        dtype="bfloat16",
 								        marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")],  # noqa: E501
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    ),
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								    "qwen2_5_vl": VLMTestInfo(
 								        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
 								        test_type=(
 								            VLMTestType.IMAGE,
 								            VLMTestType.MULTI_IMAGE,
 								            VLMTestType.VIDEO
 								        ),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
 								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForVision2Seq,
 								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								    ),
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    #### Extended model tests
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								    "aria": VLMTestInfo(
 								        models=["rhymes-ai/Aria"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ", # noqa: E501
 								        img_idx_to_prompt=lambda idx: "<fim_prefix><|img|><fim_suffix>\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<vlm_image>Please describe the image shortly.",
 								            "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
 								        }),
 								        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",    # noqa: E501
 								        stop_str=["<|im_end|>"],
 								        image_size_factors=[(0.10, 0.15)],
 								        max_tokens=64,
 								        marks=[large_gpu_mark(min_gb=64)],
 								    ),
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								    "aya_vision": VLMTestInfo(
 								        models=["CohereForAI/aya-vision-8b"],
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        test_type=(VLMTestType.IMAGE),
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<image>What is the season?",  # noqa: E501
 								        }),
 								        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
 								    ),
 								    "aya_vision-multi_image": VLMTestInfo(
 								        models=["CohereForAI/aya-vision-8b"],
 								        test_type=(VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<image>What is the season?",  # noqa: E501
 								        }),
 								        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
 								        max_model_len=4096,
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
 								        marks=[large_gpu_mark(min_gb=32)],
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "blip2": VLMTestInfo(
-												[Bugfix] Check dimensions of multimodal embeddings in V1 (#15816)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-01 00:01:35 +08:00
+								        # TODO: Change back to 2.7b once head_dim = 80 is supported
 								        models=["Salesforce/blip2-opt-6.7b"],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
 								        img_idx_to_prompt=lambda idx: "",
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
 								    ),
 								    "chameleon": VLMTestInfo(
 								        models=["facebook/chameleon-7b"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[V1][VLM] V1 support for selected single-image models. (#11632)

Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Isotr0py <2037008807@qq.com>
											
										
										
											2024-12-31 13:17:22 -08:00
+								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        # For chameleon, we only compare the sequences
 								        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
 								        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
 								        comparator=check_outputs_equal,
 								        max_tokens=8,
 								        dtype="bfloat16",
 								    ),
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								    "deepseek_vl_v2": VLMTestInfo(
-												[Model] Port deepseek-vl2 processor, remove dependency (#12169)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-01-18 13:59:39 +08:00
+								        models=["Isotr0py/deepseek-vl2-tiny"], # model repo using dynamic module
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        single_image_prompts=IMAGE_ASSETS.prompts({
-												[Model] Add support for deepseek-vl2-tiny model (#12068)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-01-17 01:14:48 +08:00
+								            "stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
 								            "cherry_blossom": "<image>\nPlease infer the season with reason in details.",   # noqa: E501
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        }),
-												[Model] Add support for deepseek-vl2-tiny model (#12068)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-01-17 01:14:48 +08:00
+								        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",    # noqa: E501
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
 								        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
 								        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
-												[Model] Add support for deepseek-vl2-tiny model (#12068)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-01-17 01:14:48 +08:00
+								        image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "fuyu": VLMTestInfo(
 								        models=["adept/fuyu-8b"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"{img_prompt}\n",
 								        img_idx_to_prompt=lambda idx: "",
 								        max_model_len=2048,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        use_tokenizer_eos=True,
 								        vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
 								        num_logprobs=10,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								    ),
-												[VLM] Support pan-and-scan for Gemma3 multi-modal processor (#14672)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-03-13 17:23:12 +08:00
+								    "gemma3": VLMTestInfo(
 								        models=["google/gemma-3-4b-it"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
 								        }),
 								        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[VLM] Support pan-and-scan for Gemma3 multi-modal processor (#14672)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-03-13 17:23:12 +08:00
+								        vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
 								        patch_hf_runner=model_utils.gemma3_patch_hf_runner,
 								    ),
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								    "glm4v": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["THUDM/glm-4v-9b"],
 								        test_type=VLMTestType.IMAGE,
-												[Bugfix] Fix prompt format of GLM4V (#14539)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-13 19:37:17 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
 								        }),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        max_model_len=2048,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
-												[Bugfix] Fix prompt format of GLM4V (#14539)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-13 19:37:17 +08:00
+								        patch_hf_runner=model_utils.glm4v_patch_hf_runner,
 								        # The image embeddings match with HF but the outputs of the language
 								        # decoder are only consistent up to 2 decimal places.
 								        # So, we need to reduce the number of tokens for the test to pass.
 								        max_tokens=8,
 								        num_logprobs=10,
-												[VLM] Support caching in merged multi-modal processor (#11396)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-28 01:22:48 +08:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								    ),
 								    "h2ovl": VLMTestInfo(
 								        models = [
 								            "h2oai/h2ovl-mississippi-800m",
-												[Bugfix] Check dimensions of multimodal embeddings in V1 (#15816)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-01 00:01:35 +08:00
+								            # TODO: Re-enable once head_dim = 80 is supported
 								            # "h2oai/h2ovl-mississippi-2b",
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								        ],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<image>\nWhat is the season?",
 								        }),
 								        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
 								        max_model_len=8192,
 								        use_tokenizer_eos=True,
-												[VLM] Merged multi-modal processor for InternVL-based models (#12553)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-02-04 16:44:52 +08:00
+								        num_logprobs=10,
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								        patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								    "idefics3": VLMTestInfo(
-												[VLM] merged multimodal processor and V1 support for idefics3 (#12660)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-02-04 20:00:51 +08:00
+								        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "<image>",
 								        max_model_len=8192,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[VLM] merged multimodal processor and V1 support for idefics3 (#12660)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-02-04 20:00:51 +08:00
+								        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "intern_vl": VLMTestInfo(
 								        models=[
 								            "OpenGVLab/InternVL2-1B",
 								            "OpenGVLab/InternVL2-2B",
 								            "OpenGVLab/Mono-InternVL-2B",
 								        ],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<image>\nWhat is the season?",
 								        }),
 								        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
 								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.internvl_patch_hf_runner,
 								    ),
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								    "llama4": VLMTestInfo(
 								        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
 								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
 								        img_idx_to_prompt=lambda _: "<|image|>",
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        distributed_executor_backend="mp",
 								        image_size_factors=[(.25, 0.5, 1.0)],
 								        hf_model_kwargs={"device_map": "auto"},
 								        max_model_len=8192,
 								        max_num_seqs=4,
 								        dtype="bfloat16",
 								        auto_cls=AutoModelForImageTextToText,
 								        tensor_parallel_size=8,
 								        vllm_runner_kwargs={"gpu_memory_utilization": 0.8},
 								        marks=multi_gpu_marks(num_gpus=8),
 								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "llava_next": VLMTestInfo(
 								        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
 								        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
 								        max_model_len=10240,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
 								        custom_test_opts=[CustomTestOptions(
 								            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
 								            ),
 								            limit_mm_per_prompt={"image": 4},
 								        )],
 								    ),
-												[VLM] Merged multi-modal processors for LLaVA-NeXT-Video and LLaVA-OneVision (#11717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-01-04 19:40:53 +08:00
+								    "llava_onevision": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
 								        num_video_frames=16,
 								        max_model_len=16384,
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        auto_cls=AutoModelForVision2Seq,
 								        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
 								        custom_test_opts=[CustomTestOptions(
 								            inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
 								                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
 								            ),
 								            limit_mm_per_prompt={"video": 4},
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								            runner_mm_key="videos",
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        )],
 								    ),
 								    "llava_next_video": VLMTestInfo(
 								        models=["llava-hf/LLaVA-NeXT-Video-7B-hf"],
 								        test_type=VLMTestType.VIDEO,
 								        prompt_formatter=lambda vid_prompt: f"USER: {vid_prompt} ASSISTANT:",
 								        num_video_frames=16,
 								        max_model_len=4096,
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        max_num_seqs=2,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        auto_cls=AutoModelForVision2Seq,
 								        vllm_output_post_proc=model_utils.llava_video_vllm_to_hf_output,
 								    ),
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								    "mantis": VLMTestInfo(
 								        models=["TIGER-Lab/Mantis-8B-siglip-llama3"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        max_model_len=4096,
 								        get_stop_token_ids=lambda tok: [128009],
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								        vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
 								        patch_hf_runner=model_utils.mantis_patch_hf_runner,
 								    ),
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    "minicpmv_25": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["openbmb/MiniCPM-Llama3-V-2_5"],
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								        test_type=VLMTestType.IMAGE,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
-												[Core] Update dtype detection and defaults (#14858)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-19 13:49:33 +08:00
+								        patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    ),
-												[Model] Refactoring of MiniCPM-V and add MiniCPM-o-2.6 support for vLLM (#12069)

Signed-off-by: hzh <hezhihui_thu@163.com>
Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
Signed-off-by: shaochangxu.scx <shaochangxu.scx@antgroup.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: NickLucche <nlucches@redhat.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
Signed-off-by: Akshat Tripathi <akshat@krai.ai>
Signed-off-by: Oleg Mosalov <oleg@krai.ai>
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu>
Signed-off-by: Chenguang Li <757486878@qq.com>
Signed-off-by: youkaichao <youkaichao@gmail.com>
Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: Shanshan Shen <467638484@qq.com>
Signed-off-by: elijah <f1renze.142857@gmail.com>
Signed-off-by: Yikun <yikunkero@gmail.com>
Signed-off-by: mgoin <michael@neuralmagic.com>
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: Konrad Zawora <kzawora@habana.ai>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
Co-authored-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
Co-authored-by: shaochangxu <85155497+shaochangxu@users.noreply.github.com>
Co-authored-by: shaochangxu.scx <shaochangxu.scx@antgroup.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: sixgod <evethwillbeok@outlook.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Rafael Vasquez <rafvasq21@gmail.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Akshat Tripathi <Akshat.tripathi6568@gmail.com>
Co-authored-by: Oleg Mosalov <oleg@krai.ai>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Avshalom Manevich <12231371+avshalomman@users.noreply.github.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Co-authored-by: Yangcheng Li <liyangcheng.lyc@alibaba-inc.com>
Co-authored-by: Siyuan Li <94890248+liaoyanqing666@users.noreply.github.com>
Co-authored-by: Concurrensee <yida.wu@amd.com>
Co-authored-by: Chenguang Li <757486878@qq.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Alex Brooks <alex.brooks@ibm.com>
Co-authored-by: Chen Zhang <zhangch99@outlook.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Shanshan Shen <467638484@qq.com>
Co-authored-by: elijah <30852919+e1ijah1@users.noreply.github.com>
Co-authored-by: Yikun Jiang <yikunkero@gmail.com>
Co-authored-by: Steve Luo <36296769+SunflowerAries@users.noreply.github.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Konrad Zawora <kzawora@habana.ai>
Co-authored-by: TJian <tunjian1996@gmail.com>
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
Co-authored-by: maang-h <55082429+maang-h@users.noreply.github.com>
Co-authored-by: Elfie Guo <164945471+elfiegg@users.noreply.github.com>
Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-01-29 17:24:59 +08:00
+								    "minicpmo_26": VLMTestInfo(
 								        models=["openbmb/MiniCPM-o-2_6"],
-												[V1] Override `mm_counts` for dummy data creation (#15703)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-30 18:20:42 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
 								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
 								        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
 								    ),
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    "minicpmv_26": VLMTestInfo(
 								        models=["openbmb/MiniCPM-V-2_6"],
-												[V1] Override `mm_counts` for dummy data creation (#15703)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-30 18:20:42 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
 								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
 								        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
 								    ),
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								    "molmo": VLMTestInfo(
 								        models=["allenai/Molmo-7B-D-0924"],
-												[Model] Support multi-image for Molmo (#15438)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-26 11:26:33 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[VLM] Merged multi-modal processor for Molmo (#12966)


											
										
										
											2025-02-13 20:34:00 +08:00
+								        prompt_formatter=identity,
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												[VLM] Merged multi-modal processor for Molmo (#12966)


											
										
										
											2025-02-13 20:34:00 +08:00
+								        patch_hf_runner=model_utils.molmo_patch_hf_runner,
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    # Tests for phi3v currently live in another file because of a bug in
 								    # transformers. Once this issue is fixed, we can enable them here instead.
 								    # https://github.com/huggingface/transformers/issues/34307
 								    # "phi3v": VLMTestInfo(
 								    #     models=["microsoft/Phi-3.5-vision-instruct"],
 								    #     test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								    #     prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n", # noqa: E501
 								    #     img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
 								    #     max_model_len=4096,
 								    #     max_num_seqs=2,
 								    #     task="generate",
 								    #     # use eager mode for hf runner since phi3v didn't work with flash_attn
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								    #     hf_model_kwargs={"_attn_implementation": "eager"},
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    #     use_tokenizer_eos=True,
 								    #     vllm_output_post_proc=model_utils.phi3v_vllm_to_hf_output,
 								    #     num_logprobs=10,
 								    # ),
-												[CI/Build] Add Model Tests for PixtralHF (#9813)


											
										
										
											2024-11-01 09:55:29 -04:00
+								    "pixtral_hf": VLMTestInfo(
 								        models=["nm-testing/pixtral-12b-FP8-dynamic"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<s>[INST]{img_prompt}[/INST]",
 								        img_idx_to_prompt=lambda idx: "[IMG]",
 								        max_model_len=8192,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								        marks=[large_gpu_mark(min_gb=48)],
-												[CI/Build] Add Model Tests for PixtralHF (#9813)


											
										
										
											2024-11-01 09:55:29 -04:00
+								    ),
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								    "qwen_vl": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["Qwen/Qwen-VL"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=identity,
 								        img_idx_to_prompt=lambda idx: f"Picture {idx}: <img></img>\n",
 								        max_model_len=1024,
 								        max_num_seqs=2,
 								        vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
 								        prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
 								    ),
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								    "qwen2_vl": VLMTestInfo(
 								        models=["Qwen/Qwen2-VL-2B-Instruct"],
 								        test_type=(
 								            VLMTestType.IMAGE,
 								            VLMTestType.MULTI_IMAGE,
 								            VLMTestType.VIDEO
 								        ),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
 								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForVision2Seq,
 								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        marks=[pytest.mark.cpu_model],
 								    ),
-												[Model] Support Skywork-R1V (#15397)

Signed-off-by: jiacai.liu <932997367@qq.com>
Co-authored-by: jiacai.liu <932997367@qq.com>
											
										
										
											2025-03-29 11:39:21 +08:00
+								    "skywork_r1v": VLMTestInfo(
 								        models=["Skywork/Skywork-R1V-38B"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n", # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts({
 								            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
 								            "cherry_blossom": "<image>\nWhat is the season?",
 								        }),
 								        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
 								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
 								        marks=[large_gpu_mark(min_gb=80)],
 								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ### Tensor parallel / multi-gpu broadcast tests
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "chameleon-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["facebook/chameleon-7b"],
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
 								        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
 								        comparator=check_outputs_equal,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        **COMMON_BROADCAST_SETTINGS # type: ignore
 								    ),
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "llava-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-1.5-7b-hf"],
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        **COMMON_BROADCAST_SETTINGS # type: ignore
 								    ),
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "llava_next-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
 								        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
 								        max_model_len=10240,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        **COMMON_BROADCAST_SETTINGS # type: ignore
 								    ),
 								    ### Custom input edge-cases for specific models
 								    "intern_vl-diff-patches": VLMTestInfo(
 								        models=["OpenGVLab/InternVL2-2B"],
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.internvl_patch_hf_runner,
 								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=inp,
 								                limit_mm_per_prompt={"image": 2},
 								            ) for inp in custom_inputs.different_patch_input_cases_internvl()
 								        ],
 								    ),
-												[VLM] Merged multi-modal processors for LLaVA-NeXT-Video and LLaVA-OneVision (#11717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-01-04 19:40:53 +08:00
+								    "llava_onevision-multiple-images": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=16384,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForVision2Seq,
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
 								        custom_test_opts=[CustomTestOptions(
 								            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
 								            ),
 								            limit_mm_per_prompt={"image": 4},
 								        )],
 								    ),
-												[Bugfix] Fix incorrect qwen2.5-vl attention mask pre-computation (#15200)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-03-21 10:18:04 +08:00
+								    # regression test for https://github.com/vllm-project/vllm/issues/15122
 								    "qwen2_5_vl-windows-attention": VLMTestInfo(
 								        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForVision2Seq,
 								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        custom_test_opts=[CustomTestOptions(
 								            inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
 								            limit_mm_per_prompt={"image": 1},
 								        )],
 								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								}
 								# yapf: enable
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								def _mark_splits(
 								    test_settings: dict[str, VLMTestInfo],
 								    *,
 								    num_groups: int,
 								) -> dict[str, VLMTestInfo]:
 								    name_by_test_info_id = {id(v): k for k, v in test_settings.items()}
 								    test_infos_by_model = defaultdict[str, list[VLMTestInfo]](list)
 								    for info in test_settings.values():
 								        for model in info.models:
 								            test_infos_by_model[model].append(info)
 								    models = sorted(test_infos_by_model.keys())
 								    split_size = math.ceil(len(models) / num_groups)
 								    new_test_settings = dict[str, VLMTestInfo]()
 								    for i in range(num_groups):
 								        models_in_group = models[i * split_size:(i + 1) * split_size]
 								        for model in models_in_group:
 								            for info in test_infos_by_model[model]:
 								                new_marks = (info.marks or []) + [pytest.mark.split(group=i)]
 								                new_info = info._replace(marks=new_marks)
 								                new_test_settings[name_by_test_info_id[id(info)]] = new_info
 								    missing_keys = test_settings.keys() - new_test_settings.keys()
 								    assert not missing_keys, f"Missing keys: {missing_keys}"
 								    return new_test_settings
 								VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								### Test wrappers
 								# Wrappers around the core test running func for:
 								# - single image
 								# - multi-image
 								# - image embeddings
 								# - video
 								# - custom inputs
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_single_image_models(tmp_path: PosixPath, model_type: str,
 								                             test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                             hf_runner: type[HfRunner],
 								                             vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                             image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_single_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.MULTI_IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_multi_image_models(tmp_path: PosixPath, model_type: str,
 								                            test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                            hf_runner: type[HfRunner],
 								                            vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                            image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_multi_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.EMBEDDING,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_image_embedding_models(model_type: str,
 								                                test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                                hf_runner: type[HfRunner],
 								                                vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                                image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_embedding_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.VIDEO,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                      hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                      video_assets: _VideoAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_video_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        video_assets=video_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.CUSTOM_INPUTS,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_custom_inputs_models(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								    monkeypatch,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								):
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_custom_inputs_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								    )
 								#### Tests filtering for things running each test as a new process
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
 								                                   test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                                   hf_runner: type[HfRunner],
 								                                   vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                                   image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_single_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.MULTI_IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
 								                                  test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                                  hf_runner: type[HfRunner],
 								                                  vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                                  image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_multi_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.EMBEDDING,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_image_embedding_models_heavy(model_type: str,
 								                                      test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                                      hf_runner: type[HfRunner],
 								                                      vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                                      image_assets: _ImageAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_embedding_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.VIDEO,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								                            hf_runner: type[HfRunner],
 								                            vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								                            video_assets: _VideoAssets, monkeypatch):
 								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_video_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        video_assets=video_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.CUSTOM_INPUTS,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								    ))
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_custom_inputs_models_heavy(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								    monkeypatch,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								):
-												[V1] V1 Enablement Oracle  (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
											
										
										
											2025-03-15 01:02:20 -04:00
+								    if model_type in REQUIRES_V0_MODELS:
 								        monkeypatch.setenv("VLLM_USE_V1", "0")
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_custom_inputs_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								    )