diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index c23b37db..8e4be08f 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -22,7 +22,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py" # Run basic model test docker exec cpu-test bash -c " - pip install pytest + pip install pytest matplotlib einops transformers_stream_generator pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported # online inference diff --git a/requirements-test.txt b/requirements-test.txt index 62d6cc49..95909d37 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -11,7 +11,7 @@ pytest-shard # testing utils awscli -einops # required for MPT +einops # required for MPT and qwen-vl httpx peft requests @@ -19,6 +19,8 @@ ray sentence-transformers # required for embedding compressed-tensors==0.4.0 # required for compressed-tensors timm # required for internvl test +transformers_stream_generator # required for qwen-vl test +matplotlib # required for qwen-vl test # TODO: Add this after fully implementing llava(mantis) # git+https://github.com/TIGER-AI-Lab/Mantis.git # required for llava(mantis) test diff --git a/tests/models/test_qwen.py b/tests/models/test_qwen.py new file mode 100644 index 00000000..03605e3b --- /dev/null +++ b/tests/models/test_qwen.py @@ -0,0 +1,48 @@ +from typing import Type + +import pytest + +from ..conftest import HfRunner, VllmRunner +from .utils import check_logprobs_close + +models = ["qwen/qwen-vl"] + + +@pytest.mark.parametrize("dtype", ["half"]) +@pytest.mark.parametrize("max_tokens", [32]) +@pytest.mark.parametrize("num_logprobs", [5]) +@pytest.mark.parametrize("model", models) +def test_text_only_qwen_model( + hf_runner: Type[HfRunner], + vllm_runner: Type[VllmRunner], + example_prompts, + model: str, + *, + dtype: str, + max_tokens: int, + num_logprobs: int, +): + # This test checks language inputs only, since the visual component + # for qwen-vl is still unsupported in VLLM. In the near-future, the + # implementation and this test will be extended to consider + # visual inputs as well. + with hf_runner(model, dtype=dtype, is_vision_model=False) as hf_model: + hf_outputs = hf_model.generate_greedy_logprobs_limit( + example_prompts, + max_tokens, + num_logprobs=num_logprobs, + ) + + with vllm_runner(model, dtype=dtype) as vllm_model: + vllm_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, + max_tokens, + num_logprobs=num_logprobs, + ) + + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + )