vllm/tests/v1/tpu/test_sampler.py

# SPDX-License-Identifier: Apache-2.0
import pytest

from vllm import LLM, envs
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams

if not envs.VLLM_USE_V1:
    pytest.skip(
        "Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
        allow_module_level=True,
    )


@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
@pytest.mark.skipif(not current_platform.is_tpu(),
                    reason="This test needs a TPU")
def test_sampler_different(model_name: str):
    """
    Test significantly different sampling params to assert the model produces 
    different results.
    """
    llm = LLM(model_name,
              enforce_eager=False,
              max_num_seqs=1,
              max_model_len=512,
              max_num_batched_tokens=512)
    prompts = [
        "Write a short story about a robot that dreams for the first time."
    ]
    sampling_params = SamplingParams(temperature=0.9, min_p=0.2, max_tokens=64)
    output = llm.generate(prompts, sampling_params)

    sampling_params = SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64)
    output2 = llm.generate(prompts, sampling_params)
    assert output[0].outputs[0].text != output2[0].outputs[0].text
[V1][TPU] Support V1 Sampler for ragged attention (#14227) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-20 05:00:39 +01:00			`# SPDX-License-Identifier: Apache-2.0`
			`import pytest`

			`from vllm import LLM, envs`
			`from vllm.platforms import current_platform`
			`from vllm.sampling_params import SamplingParams`

			`if not envs.VLLM_USE_V1:`
			`pytest.skip(`
			"Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
			`allow_module_level=True,`
			`)`


			`@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])`
			`@pytest.mark.skipif(not current_platform.is_tpu(),`
			`reason="This test needs a TPU")`
			`def test_sampler_different(model_name: str):`
			`"""`
			`Test significantly different sampling params to assert the model produces`
			`different results.`
			`"""`
[Bugfix][TPU][V1] Fix recompilation (#15553) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-27 20:15:06 +01:00			`llm = LLM(model_name,`
			`enforce_eager=False,`
			`max_num_seqs=1,`
			`max_model_len=512,`
			`max_num_batched_tokens=512)`
[V1][TPU] Support V1 Sampler for ragged attention (#14227) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-20 05:00:39 +01:00			`prompts = [`
			`"Write a short story about a robot that dreams for the first time."`
			`]`
			`sampling_params = SamplingParams(temperature=0.9, min_p=0.2, max_tokens=64)`
			`output = llm.generate(prompts, sampling_params)`

			`sampling_params = SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64)`
			`output2 = llm.generate(prompts, sampling_params)`
			`assert output[0].outputs[0].text != output2[0].outputs[0].text`