[Bugfix] Only require XGrammar on x86 (#10865)
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
2f2cdc745a
commit
7090c27bb2
@ -19,7 +19,7 @@ prometheus-fastapi-instrumentator >= 7.0.0
|
||||
tiktoken >= 0.6.0 # Required for DBRX tokenizer
|
||||
lm-format-enforcer >= 0.10.9, < 0.11
|
||||
outlines >= 0.0.43, < 0.1
|
||||
xgrammar
|
||||
xgrammar >= 0.1.5; platform_machine == "x86_64"
|
||||
typing_extensions >= 4.10
|
||||
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
|
||||
partial-json-parser # used for parsing partial JSON outputs
|
||||
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedTokenizer
|
||||
@ -25,6 +26,12 @@ def maybe_backend_fallback(
|
||||
guided_params.backend = "xgrammar"
|
||||
|
||||
if guided_params.backend == "xgrammar":
|
||||
# xgrammar only has x86 wheels for linux, fallback to outlines
|
||||
if current_platform.get_cpu_architecture() is not CpuArchEnum.X86:
|
||||
logger.warning("xgrammar is only supported on x86 CPUs. "
|
||||
"Falling back to use outlines instead.")
|
||||
guided_params.backend = "outlines"
|
||||
|
||||
# xgrammar doesn't support regex or choice, fallback to outlines
|
||||
if guided_params.regex is not None or guided_params.choice is not None:
|
||||
logger.warning(
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .interface import _Backend # noqa: F401
|
||||
from .interface import Platform, PlatformEnum, UnspecifiedPlatform
|
||||
from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform
|
||||
|
||||
current_platform: Platform
|
||||
|
||||
@ -120,4 +120,4 @@ elif is_openvino:
|
||||
else:
|
||||
current_platform = UnspecifiedPlatform()
|
||||
|
||||
__all__ = ['Platform', 'PlatformEnum', 'current_platform']
|
||||
__all__ = ['Platform', 'PlatformEnum', 'current_platform', 'CpuArchEnum']
|
||||
|
@ -1,4 +1,5 @@
|
||||
import enum
|
||||
import platform
|
||||
import random
|
||||
from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple, Union
|
||||
|
||||
@ -37,6 +38,14 @@ class PlatformEnum(enum.Enum):
|
||||
UNSPECIFIED = enum.auto()
|
||||
|
||||
|
||||
class CpuArchEnum(enum.Enum):
|
||||
X86 = enum.auto()
|
||||
ARM = enum.auto()
|
||||
POWERPC = enum.auto()
|
||||
OTHER = enum.auto()
|
||||
UNKNOWN = enum.auto()
|
||||
|
||||
|
||||
class DeviceCapability(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
@ -184,6 +193,23 @@ class Platform:
|
||||
f"{quant} quantization is currently not supported in "
|
||||
f"{cls.device_name}.")
|
||||
|
||||
@classmethod
|
||||
def get_cpu_architecture(cls) -> CpuArchEnum:
|
||||
"""
|
||||
Determine the CPU architecture of the current system.
|
||||
Returns CpuArchEnum indicating the architecture type.
|
||||
"""
|
||||
machine = platform.machine().lower()
|
||||
|
||||
if machine in ("x86_64", "amd64", "i386", "i686"):
|
||||
return CpuArchEnum.X86
|
||||
elif machine.startswith("arm") or machine.startswith("aarch"):
|
||||
return CpuArchEnum.ARM
|
||||
elif machine.startswith("ppc"):
|
||||
return CpuArchEnum.POWERPC
|
||||
|
||||
return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN
|
||||
|
||||
|
||||
class UnspecifiedPlatform(Platform):
|
||||
_enum = PlatformEnum.UNSPECIFIED
|
||||
|
Loading…
x
Reference in New Issue
Block a user