From 40e1360bb6e9b299d604863b3be5ccbe9a3bee48 Mon Sep 17 00:00:00 2001 From: Alex Brooks Date: Sun, 18 Aug 2024 17:43:46 -0600 Subject: [PATCH] [CI/Build] Add text-only test for Qwen models (#7475) Signed-off-by: Alex-Brooks --- .buildkite/run-cpu-test.sh | 2 +- requirements-test.txt | 4 +++- tests/models/test_qwen.py | 48 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 tests/models/test_qwen.py diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index c23b37db..8e4be08f 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -22,7 +22,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py" # Run basic model test docker exec cpu-test bash -c " - pip install pytest + pip install pytest matplotlib einops transformers_stream_generator pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported # online inference diff --git a/requirements-test.txt b/requirements-test.txt index 62d6cc49..95909d37 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -11,7 +11,7 @@ pytest-shard # testing utils awscli -einops # required for MPT +einops # required for MPT and qwen-vl httpx peft requests @@ -19,6 +19,8 @@ ray sentence-transformers # required for embedding compressed-tensors==0.4.0 # required for compressed-tensors timm # required for internvl test +transformers_stream_generator # required for qwen-vl test +matplotlib # required for qwen-vl test # TODO: Add this after fully implementing llava(mantis) # git+https://github.com/TIGER-AI-Lab/Mantis.git # required for llava(mantis) test diff --git a/tests/models/test_qwen.py b/tests/models/test_qwen.py new file mode 100644 index 00000000..03605e3b --- /dev/null +++ b/tests/models/test_qwen.py @@ -0,0 +1,48 @@ +from typing import Type + +import pytest + +from ..conftest import HfRunner, VllmRunner +from .utils import check_logprobs_close + +models = ["qwen/qwen-vl"] + + +@pytest.mark.parametrize("dtype", ["half"]) +@pytest.mark.parametrize("max_tokens", [32]) +@pytest.mark.parametrize("num_logprobs", [5]) +@pytest.mark.parametrize("model", models) +def test_text_only_qwen_model( + hf_runner: Type[HfRunner], + vllm_runner: Type[VllmRunner], + example_prompts, + model: str, + *, + dtype: str, + max_tokens: int, + num_logprobs: int, +): + # This test checks language inputs only, since the visual component + # for qwen-vl is still unsupported in VLLM. In the near-future, the + # implementation and this test will be extended to consider + # visual inputs as well. + with hf_runner(model, dtype=dtype, is_vision_model=False) as hf_model: + hf_outputs = hf_model.generate_greedy_logprobs_limit( + example_prompts, + max_tokens, + num_logprobs=num_logprobs, + ) + + with vllm_runner(model, dtype=dtype) as vllm_model: + vllm_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, + max_tokens, + num_logprobs=num_logprobs, + ) + + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + )