[BugFix] Lazily import XgrammarBackend to avoid early cuda init (#15171)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
cfbca8a2f2
commit
c47aafa37c
@ -9,7 +9,6 @@ from vllm.config import VllmConfig
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
|
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
|
||||||
StructuredOutputGrammar)
|
StructuredOutputGrammar)
|
||||||
from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -47,6 +46,9 @@ class StructuredOutputManager:
|
|||||||
if self.backend is None:
|
if self.backend is None:
|
||||||
backend_name = request.sampling_params.guided_decoding.backend_name
|
backend_name = request.sampling_params.guided_decoding.backend_name
|
||||||
if backend_name == "xgrammar":
|
if backend_name == "xgrammar":
|
||||||
|
from vllm.v1.structured_output.backend_xgrammar import (
|
||||||
|
XgrammarBackend)
|
||||||
|
|
||||||
self.backend = XgrammarBackend(self.vllm_config)
|
self.backend = XgrammarBackend(self.vllm_config)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user