35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
![]() |
# SPDX-License-Identifier: Apache-2.0
|
||
|
|
||
|
import pytest
|
||
|
import torch
|
||
|
|
||
|
from vllm.model_executor.models.vision import resolve_visual_encoder_outputs
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
("feature_sample_layers", "num_layers_loaded", "max_possible_layers",
|
||
|
"expected_features"),
|
||
|
[
|
||
|
# All layers loaded
|
||
|
([1, 10], 10, 10, [1, 10]),
|
||
|
([-10, -1], 10, 10, [1, 10]),
|
||
|
# Some layers not loaded
|
||
|
([1, 10], 10, 20, [1, 10]),
|
||
|
([-20, -11], 10, 20, [1, 10]),
|
||
|
])
|
||
|
def test_resolve_visual_encoder_outputs(feature_sample_layers,
|
||
|
num_layers_loaded, max_possible_layers,
|
||
|
expected_features):
|
||
|
"""
|
||
|
Test that offsets are correctly handled for vision feature layers.
|
||
|
"""
|
||
|
encoder_outputs = [
|
||
|
torch.tensor([idx]) for idx in range(num_layers_loaded + 1)
|
||
|
]
|
||
|
output_tensor = resolve_visual_encoder_outputs(
|
||
|
encoder_outputs=encoder_outputs,
|
||
|
feature_sample_layers=feature_sample_layers,
|
||
|
post_layer_norm=None,
|
||
|
max_possible_layers=max_possible_layers)
|
||
|
assert torch.equal(torch.tensor(expected_features), output_tensor)
|