130 lines
4.1 KiB
Python
130 lines
4.1 KiB
Python
![]() |
from typing import Optional, Tuple
|
||
|
|
||
|
import pytest
|
||
|
import torch
|
||
|
from PIL.Image import Image
|
||
|
from transformers import AutoConfig
|
||
|
|
||
|
# Import the functions to test
|
||
|
from vllm.model_executor.models.h2ovl import (calculate_num_blocks,
|
||
|
image_to_pixel_values_wrapper)
|
||
|
from vllm.multimodal.utils import rescale_image_size
|
||
|
|
||
|
models = [
|
||
|
"h2oai/h2ovl-mississippi-800m", # Replace with your actual model names
|
||
|
"h2oai/h2ovl-mississippi-2b",
|
||
|
]
|
||
|
|
||
|
|
||
|
def run_preprocessing_test(
|
||
|
image: Image,
|
||
|
config,
|
||
|
max_dynamic_patch: Optional[int] = None,
|
||
|
) -> Tuple[torch.Tensor, int]:
|
||
|
"""Test the image preprocessing and calculate expected blocks."""
|
||
|
|
||
|
if max_dynamic_patch is None:
|
||
|
max_dynamic_patch = config.max_dynamic_patch
|
||
|
|
||
|
width, height = image.size
|
||
|
use_MSAC = config.use_msac
|
||
|
|
||
|
# Create the mapper function with the provided configuration
|
||
|
mapper = image_to_pixel_values_wrapper(config, max_dynamic_patch, use_MSAC)
|
||
|
pixel_values = mapper(image)
|
||
|
|
||
|
# Calculate the expected number of blocks
|
||
|
if use_MSAC:
|
||
|
# First pass
|
||
|
blocks1, _, _, aspect_ratio = calculate_num_blocks(
|
||
|
width,
|
||
|
height,
|
||
|
config.min_dynamic_patch,
|
||
|
max_dynamic_patch,
|
||
|
config.vision_config.image_size,
|
||
|
use_thumbnail=False, # Thumbnail is handled separately
|
||
|
prior_aspect_ratio=None,
|
||
|
)
|
||
|
|
||
|
# Second pass
|
||
|
blocks2, _, _, _ = calculate_num_blocks(
|
||
|
width,
|
||
|
height,
|
||
|
config.min_dynamic_patch,
|
||
|
max_dynamic_patch,
|
||
|
config.vision_config.image_size,
|
||
|
use_thumbnail=False,
|
||
|
prior_aspect_ratio=aspect_ratio,
|
||
|
)
|
||
|
|
||
|
# Add thumbnail if use_thumbnail is True and total_blocks > 1
|
||
|
if config.use_thumbnail:
|
||
|
blocks1 += 1 if blocks1 > 1 else 0
|
||
|
blocks2 += 1 if blocks2 > 1 else 0
|
||
|
|
||
|
# Total blocks is the sum of blocks from both passes minus overlapping
|
||
|
total_blocks = blocks1 + blocks2 - 1
|
||
|
|
||
|
expected_blocks = total_blocks
|
||
|
|
||
|
else:
|
||
|
blocks, _, _, _ = calculate_num_blocks(
|
||
|
width,
|
||
|
height,
|
||
|
config.min_dynamic_patch,
|
||
|
max_dynamic_patch,
|
||
|
config.vision_config.image_size,
|
||
|
use_thumbnail=False,
|
||
|
prior_aspect_ratio=None,
|
||
|
)
|
||
|
expected_blocks = blocks
|
||
|
|
||
|
if config.use_thumbnail and expected_blocks > 1:
|
||
|
expected_blocks += 1
|
||
|
|
||
|
return pixel_values, expected_blocks
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("model_name", models)
|
||
|
@pytest.mark.parametrize(
|
||
|
"size_factors",
|
||
|
[
|
||
|
# Single-scale
|
||
|
[1.0],
|
||
|
# Single-scale, batched
|
||
|
[1.0, 1.0, 1.0],
|
||
|
# Multi-scale
|
||
|
[0.25, 0.5, 1.0],
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("max_dynamic_patch", [None, 2, 4, 8])
|
||
|
def test_image_preprocessing(image_assets, model_name, size_factors,
|
||
|
max_dynamic_patch):
|
||
|
"""Test image preprocessing pipeline with different configurations."""
|
||
|
# Load the configuration from the model
|
||
|
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
||
|
|
||
|
for asset in image_assets:
|
||
|
image = asset.pil_image
|
||
|
for factor in size_factors:
|
||
|
scaled_image = rescale_image_size(image, factor)
|
||
|
|
||
|
# Test preprocessing and get expected number of blocks
|
||
|
pixel_values, expected_blocks = run_preprocessing_test(
|
||
|
scaled_image, config, max_dynamic_patch)
|
||
|
|
||
|
# Verify output shapes and properties
|
||
|
actual_blocks = pixel_values.shape[0]
|
||
|
assert actual_blocks == expected_blocks, (
|
||
|
f"Expected {expected_blocks} blocks, got {actual_blocks}")
|
||
|
|
||
|
# Check image dimensions
|
||
|
expected_size = (
|
||
|
3, # Number of channels (C, H, W)
|
||
|
config.vision_config.image_size,
|
||
|
config.vision_config.image_size,
|
||
|
)
|
||
|
for img in pixel_values:
|
||
|
assert img.shape == expected_size, (
|
||
|
f"Expected image size {expected_size}, got {img.shape}")
|