vllm/tests/models/test_oot_registration.py

import os

import pytest

from vllm import LLM, SamplingParams
from vllm.assets.image import ImageAsset

from ..utils import fork_new_process_for_each_test


@fork_new_process_for_each_test
def test_plugin(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = ""
    with pytest.raises(Exception) as excinfo:
        LLM(model=dummy_opt_path, load_format="dummy")
    assert "are not supported for now" in str(excinfo.value)


@fork_new_process_for_each_test
def test_oot_registration_text_generation(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = "register_dummy_model"
    prompts = ["Hello, my name is", "The text does not matter"]
    sampling_params = SamplingParams(temperature=0)
    llm = LLM(model=dummy_opt_path, load_format="dummy")
    first_token = llm.get_tokenizer().decode(0)
    outputs = llm.generate(prompts, sampling_params)

    for output in outputs:
        generated_text = output.outputs[0].text
        # make sure only the first token is generated
        rest = generated_text.replace(first_token, "")
        assert rest == ""


@fork_new_process_for_each_test
def test_oot_registration_embedding(dummy_gemma2_embedding_path):
    os.environ["VLLM_PLUGINS"] = "register_dummy_model"
    prompts = ["Hello, my name is", "The text does not matter"]
    llm = LLM(model=dummy_gemma2_embedding_path, load_format="dummy")
    outputs = llm.embed(prompts)

    for output in outputs:
        assert all(v == 0 for v in output.outputs.embedding)


image = ImageAsset("cherry_blossom").pil_image.convert("RGB")


@fork_new_process_for_each_test
def test_oot_registration_multimodal(dummy_llava_path):
    os.environ["VLLM_PLUGINS"] = "register_dummy_model"
    prompts = [{
        "prompt": "What's in the image?<image>",
        "multi_modal_data": {
            "image": image
        },
    }, {
        "prompt": "Describe the image<image>",
        "multi_modal_data": {
            "image": image
        },
    }]

    sampling_params = SamplingParams(temperature=0)
    llm = LLM(model=dummy_llava_path,
              load_format="dummy",
              max_num_seqs=1,
              trust_remote_code=True,
              gpu_memory_utilization=0.98,
              max_model_len=4096,
              enforce_eager=True,
              limit_mm_per_prompt={"image": 1})
    first_token = llm.get_tokenizer().decode(0)
    outputs = llm.generate(prompts, sampling_params)

    for output in outputs:
        generated_text = output.outputs[0].text
        # make sure only the first token is generated
        rest = generated_text.replace(first_token, "")
        assert rest == ""
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`import os`
[Bugfix] Fix weight loading for Chameleon when TP>1 (#7410) 2024-08-13 13:33:41 +08:00
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`import pytest`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00
[Frontend] Separate pooling APIs in offline inference (#11129) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-13 18:40:07 +08:00			`from vllm import LLM, SamplingParams`
[Core][VLM] Test registration for OOT multimodal models (#8717) Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-04 10:38:25 -07:00			`from vllm.assets.image import ImageAsset`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00
[misc][ci] fix cpu test with plugins (#7489) 2024-08-13 19:27:46 -07:00			`from ..utils import fork_new_process_for_each_test`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00

[misc][ci] fix cpu test with plugins (#7489) 2024-08-13 19:27:46 -07:00			`@fork_new_process_for_each_test`
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`def test_plugin(dummy_opt_path):`
			`os.environ["VLLM_PLUGINS"] = ""`
			`with pytest.raises(Exception) as excinfo:`
			`LLM(model=dummy_opt_path, load_format="dummy")`
			`assert "are not supported for now" in str(excinfo.value)`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00

[misc][ci] fix cpu test with plugins (#7489) 2024-08-13 19:27:46 -07:00			`@fork_new_process_for_each_test`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`def test_oot_registration_text_generation(dummy_opt_path):`
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`os.environ["VLLM_PLUGINS"] = "register_dummy_model"`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00			`prompts = ["Hello, my name is", "The text does not matter"]`
			`sampling_params = SamplingParams(temperature=0)`
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`llm = LLM(model=dummy_opt_path, load_format="dummy")`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00			`first_token = llm.get_tokenizer().decode(0)`
			`outputs = llm.generate(prompts, sampling_params)`

			`for output in outputs:`
			`generated_text = output.outputs[0].text`
			`# make sure only the first token is generated`
			`rest = generated_text.replace(first_token, "")`
			`assert rest == ""`
[Core][VLM] Test registration for OOT multimodal models (#8717) Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-04 10:38:25 -07:00

[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`@fork_new_process_for_each_test`
			`def test_oot_registration_embedding(dummy_gemma2_embedding_path):`
			`os.environ["VLLM_PLUGINS"] = "register_dummy_model"`
			`prompts = ["Hello, my name is", "The text does not matter"]`
			`llm = LLM(model=dummy_gemma2_embedding_path, load_format="dummy")`
[Frontend] Separate pooling APIs in offline inference (#11129) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-12-13 18:40:07 +08:00			`outputs = llm.embed(prompts)`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00
			`for output in outputs:`
			`assert all(v == 0 for v in output.outputs.embedding)`


[Core][VLM] Test registration for OOT multimodal models (#8717) Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-04 10:38:25 -07:00			`image = ImageAsset("cherry_blossom").pil_image.convert("RGB")`


			`@fork_new_process_for_each_test`
[Model] Explicit interface for vLLM models and support OOT embedding models (#9108) 2024-10-07 14:10:35 +08:00			`def test_oot_registration_multimodal(dummy_llava_path):`
[Core][VLM] Test registration for OOT multimodal models (#8717) Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-04 10:38:25 -07:00			`os.environ["VLLM_PLUGINS"] = "register_dummy_model"`
			`prompts = [{`
			`"prompt": "What's in the image?<image>",`
			`"multi_modal_data": {`
			`"image": image`
			`},`
			`}, {`
			`"prompt": "Describe the image<image>",`
			`"multi_modal_data": {`
			`"image": image`
			`},`
			`}]`

			`sampling_params = SamplingParams(temperature=0)`
			`llm = LLM(model=dummy_llava_path,`
			`load_format="dummy",`
			`max_num_seqs=1,`
			`trust_remote_code=True,`
			`gpu_memory_utilization=0.98,`
			`max_model_len=4096,`
			`enforce_eager=True,`
			`limit_mm_per_prompt={"image": 1})`
			`first_token = llm.get_tokenizer().decode(0)`
			`outputs = llm.generate(prompts, sampling_params)`

			`for output in outputs:`
			`generated_text = output.outputs[0].text`
			`# make sure only the first token is generated`
			`rest = generated_text.replace(first_token, "")`
			`assert rest == ""`