vllm/tests/models/test_phi3v.py

import os
import re
from typing import List, Optional, Tuple, Type

import pytest
from transformers import AutoTokenizer

from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu, is_hip

from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close

pytestmark = pytest.mark.vlm

HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":
    "<|user|>\n<|image_1|>\nWhat's the content of the image?<|end|>\n<|assistant|>\n",  # noqa: E501
    "cherry_blossom":
    "<|user|>\n<|image_1|>\nWhat is the season?<|end|>\n<|assistant|>\n",
})

models = ["microsoft/Phi-3-vision-128k-instruct"]


def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
                                         Optional[SampleLogprobs]],
                      model: str):
    """Sanitize vllm output to be comparable with hf output."""
    _, output_str, out_logprobs = vllm_output

    output_str_without_image = re.sub(r"(<\|image_\d+\|>)+", "", output_str)
    assert output_str_without_image[0] == " "
    output_str_without_image = output_str_without_image[1:]

    hf_output_str = output_str_without_image + "<|end|><|endoftext|>"

    tokenizer = AutoTokenizer.from_pretrained(model)
    hf_output_ids = tokenizer.encode(output_str_without_image)
    assert hf_output_ids[0] == 1
    hf_output_ids = hf_output_ids[1:]

    return hf_output_ids, hf_output_str, out_logprobs


target_dtype = "half"
if is_cpu():
    target_dtype = "bfloat16"

# ROCm Triton FA can run into shared memory issues with these models,
# use other backends in the meantime
# FIXME (mattwong, gshtrasb, hongxiayan)
if is_hip():
    os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"


def run_test(
    hf_runner: Type[HfRunner],
    vllm_runner: Type[VllmRunner],
    image_assets: _ImageAssets,
    model: str,
    *,
    size_factors: List[float],
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
    tensor_parallel_size: int,
    distributed_executor_backend: Optional[str] = None,
):
    """Inference result should be the same between hf and vllm.

    All the image fixtures for the test is under tests/images.
    For huggingface runner, we provide the PIL images as input.
    For vllm runner, we provide MultiModalDataDict objects 
    and corresponding vision language config as input.
    Note, the text input is also adjusted to abide by vllm contract.
    The text output is sanitized to be able to compare with hf.
    """
    images = [asset.pil_image for asset in image_assets]

    inputs_per_image = [(
        [prompt for _ in size_factors],
        [rescale_image_size(image, factor) for factor in size_factors],
    ) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]

    # NOTE: take care of the order. run vLLM first, and then run HF.
    # vLLM needs a fresh new process without cuda initialization.
    # if we run HF first, the cuda initialization will be done and it
    # will hurt multiprocessing backend with fork method (the default method).

    # max_model_len should be greater than image_feature_size
    with vllm_runner(model,
                     max_model_len=4096,
                     max_num_seqs=1,
                     dtype=dtype,
                     tensor_parallel_size=tensor_parallel_size,
                     distributed_executor_backend=distributed_executor_backend,
                     enforce_eager=True) as vllm_model:
        vllm_outputs_per_image = [
            vllm_model.generate_greedy_logprobs(prompts,
                                                max_tokens,
                                                num_logprobs=num_logprobs,
                                                images=vllm_images)
            for prompts, vllm_images in inputs_per_image
        ]

    # use eager mode for hf runner, since phi3_v didn't work with flash_attn
    hf_model_kwargs = {"_attn_implementation": "eager"}
    with hf_runner(model, dtype=dtype,
                   model_kwargs=hf_model_kwargs) as hf_model:
        eos_token_id = hf_model.processor.tokenizer.eos_token_id
        hf_outputs_per_image = [
            hf_model.generate_greedy_logprobs_limit(prompts,
                                                    max_tokens,
                                                    num_logprobs=num_logprobs,
                                                    images=hf_images,
                                                    eos_token_id=eos_token_id)
            for prompts, hf_images in inputs_per_image
        ]

    for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,
                                        vllm_outputs_per_image):
        check_logprobs_close(
            outputs_0_lst=hf_outputs,
            outputs_1_lst=[
                vllm_to_hf_output(vllm_output, model)
                for vllm_output in vllm_outputs
            ],
            name_0="hf",
            name_1="vllm",
        )


# Since we use _attn_implementation="eager" for hf_runner, there is more
# significant numerical difference. The basic `logprobs=5` fails to pass.
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
    "size_factors",
    [
        # No image
        [],
        # Single-scale
        [1.0],
        # Single-scale, batched
        [1.0, 1.0, 1.0],
        # Multi-scale
        [0.25, 0.5, 1.0],
    ],
)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
                dtype: str, max_tokens: int, num_logprobs: int) -> None:
    run_test(
        hf_runner,
        vllm_runner,
        image_assets,
        model,
        size_factors=size_factors,
        dtype=dtype,
        max_tokens=max_tokens,
        num_logprobs=num_logprobs,
        tensor_parallel_size=1,
    )
[Bugfix][CI/Build][Hardware][AMD] Fix AMD tests, add HF cache, update CK FA, add partially supported model notes (#6543) 2024-07-20 11:39:07 -05:00			`import os`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`import re`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`from typing import List, Optional, Tuple, Type`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
			`import pytest`
			`from transformers import AutoTokenizer`

[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`from vllm.multimodal.utils import rescale_image_size`
			`from vllm.sequence import SampleLogprobs`
[Bugfix][CI/Build][Hardware][AMD] Fix AMD tests, add HF cache, update CK FA, add partially supported model notes (#6543) 2024-07-20 11:39:07 -05:00			`from vllm.utils import is_cpu, is_hip`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`from .utils import check_logprobs_close`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[CI/Build][Misc] Update Pytest Marker for VLMs (#5623) 2024-06-18 06:10:04 -07:00			`pytestmark = pytest.mark.vlm`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[CI/Build] Refactor image test assets (#5821) 2024-06-26 16:02:34 +08:00			`HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({`
			`"stop_sign":`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00			`"<\|user\|>\n<\|image_1\|>\nWhat's the content of the image?<\|end\|>\n<\|assistant\|>\n", # noqa: E501`
[CI/Build] Refactor image test assets (#5821) 2024-06-26 16:02:34 +08:00			`"cherry_blossom":`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`"<\|user\|>\n<\|image_1\|>\nWhat is the season?<\|end\|>\n<\|assistant\|>\n",`
[CI/Build] Refactor image test assets (#5821) 2024-06-26 16:02:34 +08:00			`})`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`models = ["microsoft/Phi-3-vision-128k-instruct"]`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00

[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`def vllm_to_hf_output(vllm_output: Tuple[List[int], str,`
			`Optional[SampleLogprobs]],`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`model: str):`
			`"""Sanitize vllm output to be comparable with hf output."""`
			`_, output_str, out_logprobs = vllm_output`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`output_str_without_image = re.sub(r"(<\\|image_\d+\\|>)+", "", output_str)`
			`assert output_str_without_image[0] == " "`
			`output_str_without_image = output_str_without_image[1:]`

[CI/Build] Cleanup VLM tests (#6107) 2024-07-04 09:58:18 +08:00			`hf_output_str = output_str_without_image + "<\|end\|><\|endoftext\|>"`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`tokenizer = AutoTokenizer.from_pretrained(model)`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`hf_output_ids = tokenizer.encode(output_str_without_image)`
			`assert hf_output_ids[0] == 1`
			`hf_output_ids = hf_output_ids[1:]`

			`return hf_output_ids, hf_output_str, out_logprobs`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00

			`target_dtype = "half"`
			`if is_cpu():`
			`target_dtype = "bfloat16"`

[Bugfix][CI/Build][Hardware][AMD] Fix AMD tests, add HF cache, update CK FA, add partially supported model notes (#6543) 2024-07-20 11:39:07 -05:00			`# ROCm Triton FA can run into shared memory issues with these models,`
			`# use other backends in the meantime`
			`# FIXME (mattwong, gshtrasb, hongxiayan)`
			`if is_hip():`
			`os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"`

[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`def run_test(`
			`hf_runner: Type[HfRunner],`
			`vllm_runner: Type[VllmRunner],`
			`image_assets: _ImageAssets,`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`model: str,`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`*,`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`size_factors: List[float],`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`dtype: str,`
			`max_tokens: int,`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`num_logprobs: int,`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`tensor_parallel_size: int,`
			`distributed_executor_backend: Optional[str] = None,`
			`):`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00			`"""Inference result should be the same between hf and vllm.`

			`All the image fixtures for the test is under tests/images.`
			`For huggingface runner, we provide the PIL images as input.`
[VLM] Remove `image_input_type` from VLM config (#5852) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-02 00:57:09 -07:00			`For vllm runner, we provide MultiModalDataDict objects`
			`and corresponding vision language config as input.`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00			`Note, the text input is also adjusted to abide by vllm contract.`
			`The text output is sanitized to be able to compare with hf.`
			`"""`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`images = [asset.pil_image for asset in image_assets]`

			`inputs_per_image = [(`
			`[prompt for _ in size_factors],`
			`[rescale_image_size(image, factor) for factor in size_factors],`
			`) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[ci][distributed] fix device count call [ci][distributed] fix some cuda init that makes it necessary to use spawn (#5991) 2024-06-30 01:06:13 -07:00			`# NOTE: take care of the order. run vLLM first, and then run HF.`
			`# vLLM needs a fresh new process without cuda initialization.`
			`# if we run HF first, the cuda initialization will be done and it`
			`# will hurt multiprocessing backend with fork method (the default method).`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`# max_model_len should be greater than image_feature_size`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`with vllm_runner(model,`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`max_model_len=4096,`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`max_num_seqs=1,`
[Model] Initialize Phi-3-vision support (#4986) 2024-06-18 10:34:33 +08:00			`dtype=dtype,`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`tensor_parallel_size=tensor_parallel_size,`
			`distributed_executor_backend=distributed_executor_backend,`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`enforce_eager=True) as vllm_model:`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`vllm_outputs_per_image = [`
			`vllm_model.generate_greedy_logprobs(prompts,`
			`max_tokens,`
			`num_logprobs=num_logprobs,`
			`images=vllm_images)`
			`for prompts, vllm_images in inputs_per_image`
[ci][distributed] fix device count call [ci][distributed] fix some cuda init that makes it necessary to use spawn (#5991) 2024-06-30 01:06:13 -07:00			`]`

			`# use eager mode for hf runner, since phi3_v didn't work with flash_attn`
			`hf_model_kwargs = {"_attn_implementation": "eager"}`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`with hf_runner(model, dtype=dtype,`
[ci][distributed] fix device count call [ci][distributed] fix some cuda init that makes it necessary to use spawn (#5991) 2024-06-30 01:06:13 -07:00			`model_kwargs=hf_model_kwargs) as hf_model:`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`eos_token_id = hf_model.processor.tokenizer.eos_token_id`
			`hf_outputs_per_image = [`
			`hf_model.generate_greedy_logprobs_limit(prompts,`
			`max_tokens,`
			`num_logprobs=num_logprobs,`
			`images=hf_images,`
			`eos_token_id=eos_token_id)`
			`for prompts, hf_images in inputs_per_image`
			`]`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,`
			`vllm_outputs_per_image):`
			`check_logprobs_close(`
			`outputs_0_lst=hf_outputs,`
			`outputs_1_lst=[`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`vllm_to_hf_output(vllm_output, model)`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`for vllm_output in vllm_outputs`
			`],`
			`name_0="hf",`
			`name_1="vllm",`
			`)`


			`# Since we use _attn_implementation="eager" for hf_runner, there is more`
			# significant numerical difference. The basic `logprobs=5` fails to pass.
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`@pytest.mark.parametrize("model", models)`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`@pytest.mark.parametrize(`
			`"size_factors",`
			`[`
			`# No image`
			`[],`
			`# Single-scale`
			`[1.0],`
			`# Single-scale, batched`
			`[1.0, 1.0, 1.0],`
			`# Multi-scale`
			`[0.25, 0.5, 1.0],`
			`],`
			`)`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`@pytest.mark.parametrize("dtype", [target_dtype])`
			`@pytest.mark.parametrize("max_tokens", [128])`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`@pytest.mark.parametrize("num_logprobs", [10])`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,`
			`dtype: str, max_tokens: int, num_logprobs: int) -> None:`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`run_test(`
			`hf_runner,`
			`vllm_runner,`
			`image_assets,`
[vlm] Remove vision language config. (#6089) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2024-07-03 15:14:16 -07:00			`model,`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`size_factors=size_factors,`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`dtype=dtype,`
			`max_tokens=max_tokens,`
[Core] Dynamic image size support for VLMs (#5276) Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: ywang96 <ywang@roblox.com> Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-07-03 11:34:00 +08:00			`num_logprobs=num_logprobs,`
[CI/Build] Add TP test for vision models (#5892) 2024-06-29 23:45:54 +08:00			`tensor_parallel_size=1,`
			`)`