[Hardware][Intel GPU] add XPU bf16 support (#12392)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji 2025-02-02 18:17:26 +08:00 committed by GitHub
parent f8ece6e17f
commit f256ebe4df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 4 deletions

View File

@ -36,7 +36,7 @@ VLLM_TARGET_DEVICE=xpu python setup.py install
:::{note}
- FP16 is the default data type in the current XPU backend. The BF16 data
type will be supported in the future.
type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
:::
## Set up using Docker

View File

@ -66,9 +66,14 @@ class XPUPlatform(Platform):
# check and update model config
model_config = vllm_config.model_config
if model_config.dtype == torch.bfloat16:
logger.warning(
"bfloat16 is not fully supported on XPU, casting to float16.")
model_config.dtype = torch.float16
bf16_supported = cls.device_support_bf16()
if not bf16_supported:
logger.warning(
"bfloat16 is only supported on Intel Data Center GPU, "
"Intel Arc GPU is not supported yet. Your device is %s,"
"which is not supported. will fallback to float16",
cls.get_device_name())
model_config.dtype = torch.float16
if not model_config.enforce_eager:
logger.warning(
"CUDA graph is not supported on XPU, fallback to the eager "
@ -116,3 +121,15 @@ class XPUPlatform(Platform):
) -> float:
torch.xpu.reset_peak_memory_stats(device)
return torch.xpu.max_memory_allocated(device)
@classmethod
def device_support_bf16(cls) -> bool:
device_name = cls.get_device_name().lower()
if device_name.count("arc") > 0:
return False
elif device_name.count("data center gpu") > 0:
return True
else:
logger.warning("Unknown device name %s, always use float16",
device_name)
return False