vllm/tests/models/test_oot_registration.py

import os

import pytest

from vllm import LLM, SamplingParams

# NOTE: the order of the tests is important
# the first test does not load any plugins
# the second test loads the plugin
# they share the same process, so the plugin is loaded for the second test


def test_plugin(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = ""
    with pytest.raises(Exception) as excinfo:
        LLM(model=dummy_opt_path, load_format="dummy")
    assert "are not supported for now" in str(excinfo.value)


def test_oot_registration(dummy_opt_path):
    os.environ["VLLM_PLUGINS"] = "register_dummy_model"
    prompts = ["Hello, my name is", "The text does not matter"]
    sampling_params = SamplingParams(temperature=0)
    llm = LLM(model=dummy_opt_path, load_format="dummy")
    first_token = llm.get_tokenizer().decode(0)
    outputs = llm.generate(prompts, sampling_params)

    for output in outputs:
        generated_text = output.outputs[0].text
        # make sure only the first token is generated
        rest = generated_text.replace(first_token, "")
        assert rest == ""
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`import os`
[Bugfix] Fix weight loading for Chameleon when TP>1 (#7410) 2024-08-13 13:33:41 +08:00
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`import pytest`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`from vllm import LLM, SamplingParams`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`# NOTE: the order of the tests is important`
			`# the first test does not load any plugins`
			`# the second test loads the plugin`
			`# they share the same process, so the plugin is loaded for the second test`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00

[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`def test_plugin(dummy_opt_path):`
			`os.environ["VLLM_PLUGINS"] = ""`
			`with pytest.raises(Exception) as excinfo:`
			`LLM(model=dummy_opt_path, load_format="dummy")`
			`assert "are not supported for now" in str(excinfo.value)`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00

[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`def test_oot_registration(dummy_opt_path):`
			`os.environ["VLLM_PLUGINS"] = "register_dummy_model"`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00			`prompts = ["Hello, my name is", "The text does not matter"]`
			`sampling_params = SamplingParams(temperature=0)`
[misc][plugin] add plugin system implementation (#7426) 2024-08-13 16:24:17 -07:00			`llm = LLM(model=dummy_opt_path, load_format="dummy")`
[Core] enable out-of-tree model register (#3871) 2024-04-06 17:11:41 -07:00			`first_token = llm.get_tokenizer().decode(0)`
			`outputs = llm.generate(prompts, sampling_params)`

			`for output in outputs:`
			`generated_text = output.outputs[0].text`
			`# make sure only the first token is generated`
			`rest = generated_text.replace(first_token, "")`
			`assert rest == ""`