[V1][Structured Output] Add supports_structured_output()
method to Platform (#16148)
Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
parent
7c80368710
commit
e9ba99f296
@ -180,3 +180,7 @@ class CpuPlatform(Platform):
|
|||||||
Get device specific communicator class for distributed communication.
|
Get device specific communicator class for distributed communication.
|
||||||
"""
|
"""
|
||||||
return "vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator" # noqa
|
return "vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator" # noqa
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
@ -308,6 +308,10 @@ class CudaPlatformBase(Platform):
|
|||||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
@ -92,3 +92,7 @@ class HpuPlatform(Platform):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def get_device_communicator_cls(cls) -> str:
|
def get_device_communicator_cls(cls) -> str:
|
||||||
return "vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator" # noqa
|
return "vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator" # noqa
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
@ -379,6 +379,13 @@ class Platform:
|
|||||||
"""
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
"""
|
||||||
|
Returns whether the current platform can support structured output.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
"""
|
"""
|
||||||
|
@ -67,3 +67,7 @@ class NeuronPlatform(Platform):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def use_all_gather(cls) -> bool:
|
def use_all_gather(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
@ -303,6 +303,10 @@ class RocmPlatform(Platform):
|
|||||||
# V1 support on AMD gpus is experimental
|
# V1 support on AMD gpus is experimental
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
# We only enable custom allreduce for MI300 series
|
# We only enable custom allreduce for MI300 series
|
||||||
|
@ -133,3 +133,8 @@ class TpuPlatform(Platform):
|
|||||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
# V1 support on TPU is experimental
|
# V1 support on TPU is experimental
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
# Structured output is not supported on TPU.
|
||||||
|
return False
|
||||||
|
@ -140,3 +140,7 @@ class XPUPlatform(Platform):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def get_device_communicator_cls(cls) -> str:
|
def get_device_communicator_cls(cls) -> str:
|
||||||
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
|
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_structured_output(cls) -> bool:
|
||||||
|
return True
|
||||||
|
@ -136,9 +136,11 @@ class Processor:
|
|||||||
f" != {engine_level_backend}")
|
f" != {engine_level_backend}")
|
||||||
else:
|
else:
|
||||||
params.guided_decoding.backend = engine_level_backend
|
params.guided_decoding.backend = engine_level_backend
|
||||||
import vllm.platforms
|
|
||||||
if vllm.platforms.current_platform.is_tpu():
|
from vllm.platforms import current_platform
|
||||||
raise ValueError("Structured output is not supported on TPU.")
|
if not current_platform.supports_structured_output():
|
||||||
|
raise ValueError("Structured output is not supported on "
|
||||||
|
f"{current_platform.device_name}.")
|
||||||
|
|
||||||
# Request content validation
|
# Request content validation
|
||||||
if engine_level_backend.startswith("xgrammar"):
|
if engine_level_backend.startswith("xgrammar"):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user