vllm/tests/test_sampling_params.py

# SPDX-License-Identifier: Apache-2.0
"""Tests for the SamplingParams class.
"""

import pytest

from vllm import SamplingParams
from vllm.config import ModelConfig
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

MODEL_NAME = "Qwen/Qwen1.5-7B"


def test_max_tokens_none():
    """max_tokens=None should be allowed"""
    SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None)


@pytest.fixture(scope="module")
def model_config():
    return ModelConfig(
        MODEL_NAME,
        task="auto",
        tokenizer=MODEL_NAME,
        tokenizer_mode="auto",
        trust_remote_code=False,
        seed=0,
        dtype="float16",
        revision=None,
    )


@pytest.fixture(scope="module")
def default_max_tokens():
    return 4096


def test_sampling_params_from_request_with_no_guided_decoding_backend(
        model_config, default_max_tokens):
    # guided_decoding_backend is not present at request level
    request = ChatCompletionRequest.model_validate({
        'messages': [{
            'role': 'user',
            'content': 'Hello'
        }],
        'model':
        MODEL_NAME,
        'response_format': {
            'type': 'json_object',
        },
    })

    sampling_params = request.to_sampling_params(
        default_max_tokens,
        model_config.logits_processor_pattern,
    )
    # we do not expect any backend to be present and the default
    # guided_decoding_backend at engine level will be used.
    assert sampling_params.guided_decoding.backend is None


@pytest.mark.parametrize("request_level_guided_decoding_backend,expected",
                         [("xgrammar", "xgrammar"),
                          ("lm-format-enforcer", "lm-format-enforcer"),
                          ("outlines", "outlines")])
def test_sampling_params_from_request_with_guided_decoding_backend(
        request_level_guided_decoding_backend: str, expected: str,
        model_config, default_max_tokens):

    request = ChatCompletionRequest.model_validate({
        'messages': [{
            'role': 'user',
            'content': 'Hello'
        }],
        'model':
        MODEL_NAME,
        'response_format': {
            'type': 'json_object',
        },
        'guided_decoding_backend':
        request_level_guided_decoding_backend,
    })

    sampling_params = request.to_sampling_params(
        default_max_tokens,
        model_config.logits_processor_pattern,
    )
    # backend correctly identified in resulting sampling_params
    assert sampling_params.guided_decoding.backend == expected
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`
[Bugfix] fix crash if max_tokens=None (#2570) 2024-01-23 22:38:55 -08:00			`"""Tests for the SamplingParams class.`
			`"""`
[Bugfix][Frontend] respect provided default guided decoding backend (#15476) Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com> 2025-04-09 14:11:10 +02:00
			`import pytest`

[Bugfix] fix crash if max_tokens=None (#2570) 2024-01-23 22:38:55 -08:00			`from vllm import SamplingParams`
[Bugfix][Frontend] respect provided default guided decoding backend (#15476) Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com> 2025-04-09 14:11:10 +02:00			`from vllm.config import ModelConfig`
			`from vllm.entrypoints.openai.protocol import ChatCompletionRequest`

			`MODEL_NAME = "Qwen/Qwen1.5-7B"`
[Bugfix] fix crash if max_tokens=None (#2570) 2024-01-23 22:38:55 -08:00

			`def test_max_tokens_none():`
			`"""max_tokens=None should be allowed"""`
			`SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None)`


[Bugfix][Frontend] respect provided default guided decoding backend (#15476) Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com> 2025-04-09 14:11:10 +02:00			`@pytest.fixture(scope="module")`
			`def model_config():`
			`return ModelConfig(`
			`MODEL_NAME,`
			`task="auto",`
			`tokenizer=MODEL_NAME,`
			`tokenizer_mode="auto",`
			`trust_remote_code=False,`
			`seed=0,`
			`dtype="float16",`
			`revision=None,`
			`)`


			`@pytest.fixture(scope="module")`
			`def default_max_tokens():`
			`return 4096`


			`def test_sampling_params_from_request_with_no_guided_decoding_backend(`
			`model_config, default_max_tokens):`
			`# guided_decoding_backend is not present at request level`
			`request = ChatCompletionRequest.model_validate({`
			`'messages': [{`
			`'role': 'user',`
			`'content': 'Hello'`
			`}],`
			`'model':`
			`MODEL_NAME,`
			`'response_format': {`
			`'type': 'json_object',`
			`},`
			`})`

			`sampling_params = request.to_sampling_params(`
			`default_max_tokens,`
			`model_config.logits_processor_pattern,`
			`)`
			`# we do not expect any backend to be present and the default`
			`# guided_decoding_backend at engine level will be used.`
			`assert sampling_params.guided_decoding.backend is None`


			`@pytest.mark.parametrize("request_level_guided_decoding_backend,expected",`
			`[("xgrammar", "xgrammar"),`
			`("lm-format-enforcer", "lm-format-enforcer"),`
			`("outlines", "outlines")])`
			`def test_sampling_params_from_request_with_guided_decoding_backend(`
			`request_level_guided_decoding_backend: str, expected: str,`
			`model_config, default_max_tokens):`

			`request = ChatCompletionRequest.model_validate({`
			`'messages': [{`
			`'role': 'user',`
			`'content': 'Hello'`
			`}],`
			`'model':`
			`MODEL_NAME,`
			`'response_format': {`
			`'type': 'json_object',`
			`},`
			`'guided_decoding_backend':`
			`request_level_guided_decoding_backend,`
			`})`

			`sampling_params = request.to_sampling_params(`
			`default_max_tokens,`
			`model_config.logits_processor_pattern,`
			`)`
			`# backend correctly identified in resulting sampling_params`
			`assert sampling_params.guided_decoding.backend == expected`