[V1][Structured Output] Minor modification to _validate_structured_output() (#16748)

Signed-off-by: shen-shanshan <467638484@qq.com>
2025-04-18 13:12:54 +08:00 · 2025-04-18 13:12:54 +08:00 · 30ed81b7ca
commit 30ed81b7ca
parent 7a4a5de729
1 changed files with 10 additions and 10 deletions
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@ -149,6 +149,7 @@ class Processor:
            "xgrammar", "xgrammar:disable-any-whitespace", "guidance",
            "guidance:disable-any-whitespace", "auto"
        ]
        engine_level_backend = self.decoding_config.guided_decoding_backend
        if engine_level_backend not in supported_backends:
            raise ValueError(f"Only {supported_backends} structured output is "
@ -169,8 +170,15 @@ class Processor:
        if engine_level_backend.startswith("xgrammar"):
            # xgrammar with no fallback
            validate_xgrammar_grammar(params)
-            params.guided_decoding.backend = engine_level_backend
+        elif engine_level_backend.startswith("guidance"):
-        elif engine_level_backend == "auto":
+            # TODO: ideally we would have the LLTokenizer here as Lark syntax
            # allows <|special_token|> and similar, see
            # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
            # Without tokenizer these are disallowed in grammars.
            validate_guidance_grammar(params, tokenizer=None)
        else:
            # NOTE: engine_level_backend must be "auto" here, because we have
            # checked supported_backends above.
            # "auto" is an opt-in to opinionated behavior where we try to
            # choose a backend based on request contents. This is not the
            # default as it is less predictable and subject to change
@ -183,14 +191,6 @@ class Processor:
                # are not supported in xgrammar. Fall back to guidance.
                params.guided_decoding.backend = "guidance"
        if engine_level_backend.startswith("guidance"):
            # TODO ideally we would have the LLTokenizer here as Lark syntax
            # allows <|special_token|> and similar, see
            # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
            # Without tokenizer these are disallowed in grammars.
            validate_guidance_grammar(params, tokenizer=None)
            params.guided_decoding.backend = engine_level_backend
    def process_inputs(
        self,
        request_id: str,