[Core][V0] Enable regex support with xgrammar (#13228)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-04-13 22:13:38 -04:00 · 2025-04-13 22:13:38 -04:00 · dc1b4a6f13
commit dc1b4a6f13
parent 63d2705edb
3 changed files with 25 additions and 9 deletions
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@ -286,15 +286,26 @@ def test_validation_against_both_guided_decoding_options(sample_regex, llm):

@pytest.mark.skip_global_cleanup
 def test_disable_guided_decoding_fallback(sample_regex, llm):
+    # see has_xgrammar_unsupported_json_features()
+    unsupported_json = {
+        "type": "object",
+        "properties": {
+            "example": {
+                "type": "string",
+                "minLength": 5  # unsupported by xgrammar
+            }
+        }
+    }
    sampling_params = SamplingParams(temperature=0.8,
                                     top_p=0.95,
                                     guided_decoding=GuidedDecodingParams(
-                                         regex=sample_regex,
+                                         json=unsupported_json,
                                         backend="xgrammar:no-fallback"))

    with pytest.raises(
            ValueError,
-            match="xgrammar does not support regex guided decoding"):
+            match="xgrammar does not support advanced JSON schema features "
+            "like enums, patterns or numeric ranges."):
        llm.generate(prompts="This should fail",
                     sampling_params=sampling_params,
                     use_tqdm=True)
--- a/vllm/model_executor/guided_decoding/init.py
+++ b/vllm/model_executor/guided_decoding/init.py
@ -59,14 +59,9 @@ def maybe_backend_fallback(
        from vllm.model_executor.guided_decoding.xgrammar_decoding import (
            xgr_installed)

-        # xgrammar doesn't support regex, fallback to outlines
-        if guided_params.regex is not None:
-            fallback_or_error(
-                guided_params,
-                "xgrammar does not support regex guided decoding.", "outlines")
        # xgrammar doesn't support some JSON schema features
-        elif (guided_params.json is not None
-              and has_xgrammar_unsupported_json_features(guided_params.json)):
+        if (guided_params.json is not None and
+                has_xgrammar_unsupported_json_features(guided_params.json)):
            fallback_or_error(
                guided_params,
                "xgrammar does not support advanced JSON schema features like "
--- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py
+++ b/vllm/model_executor/guided_decoding/xgrammar_decoding.py
@ -152,6 +152,7 @@ class GrammarConfig:
    grammar_str: str | None = None
    json_object: bool | None = None
    any_whitespace: bool = True
+    regex_str: str | None = None
    max_threads: int = 8

    @classmethod
@ -255,6 +256,13 @@ class GrammarConfig:
                max_threads=max_threads,
                tokenizer_data=tokenizer_data,
            )
+        elif guided_params.regex:
+            return cls(
+                regex_str=guided_params.regex,
+                tokenizer_hash=tokenizer_hash,
+                max_threads=max_threads,
+                tokenizer_data=tokenizer_data,
+            )
        else:
            raise ValueError(
                "Currently only support JSON and EBNF grammar mode for xgrammar"
@ -330,6 +338,8 @@ class XGrammarLogitsProcessor:
                self.ctx = compiler\
                    .compile_json_schema('{"type": "object"}',
                                         any_whitespace=any_whitespace)
+            elif self.config.regex_str:
+                self.ctx = compiler.compile_regex(self.config.regex_str)
            else:
                raise ValueError(
                    "Invalid configuration for xgrammar logits processor")