[Bugfix][Intel] Fix XPU Dockerfile Build (#7824)
Signed-off-by: tylertitsworth <tyler.titsworth@intel.com> Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
parent
d86f6b2afb
commit
260024a374
@ -11,4 +11,4 @@ trap remove_docker_container EXIT
|
||||
remove_docker_container
|
||||
|
||||
# Run the image and launch offline inference
|
||||
docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path xpu-test python3 examples/offline_inference.py
|
||||
docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test python3 examples/offline_inference.py
|
||||
|
@ -1,4 +1,6 @@
|
||||
vllm/*.so
|
||||
/.github/
|
||||
/.venv
|
||||
/build
|
||||
dist
|
||||
Dockerfile*
|
||||
vllm/*.so
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04
|
||||
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS vllm-base
|
||||
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
||||
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
||||
@ -7,20 +7,49 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
|
||||
echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \
|
||||
chmod 644 /usr/share/keyrings/intel-graphics.gpg
|
||||
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y curl libicu70 lsb-release git wget vim numactl python3 python3-pip ffmpeg libsm6 libxext6 libgl1
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y --no-install-recommends --fix-missing \
|
||||
curl \
|
||||
ffmpeg \
|
||||
git \
|
||||
libsndfile1 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libgl1 \
|
||||
lsb-release \
|
||||
numactl \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
# vim \
|
||||
wget
|
||||
|
||||
WORKDIR /workspace/vllm
|
||||
COPY requirements-xpu.txt /workspace/vllm/requirements-xpu.txt
|
||||
COPY requirements-common.txt /workspace/vllm/requirements-common.txt
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install --no-cache-dir \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
|
||||
-r requirements-xpu.txt
|
||||
|
||||
COPY ./ /workspace/vllm
|
||||
|
||||
WORKDIR /workspace/vllm
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install -v --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
|
||||
cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \
|
||||
-r requirements-xpu.txt
|
||||
ENV VLLM_TARGET_DEVICE=xpu
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
VLLM_TARGET_DEVICE=xpu python3 setup.py install
|
||||
python3 setup.py install
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
FROM vllm-base AS vllm-openai
|
||||
|
||||
# install additional dependencies for openai api server
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install accelerate hf_transfer 'modelscope!=1.15.0'
|
||||
|
||||
ENV VLLM_USAGE_SOURCE production-docker-image \
|
||||
TRITON_XPU_PROFILE 1
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
|
@ -1,7 +1,7 @@
|
||||
psutil
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy < 2.0.0
|
||||
requests
|
||||
requests >= 2.26.0
|
||||
tqdm
|
||||
py-cpuinfo
|
||||
transformers >= 4.45.0 # Required for Llama 3.2.
|
||||
|
@ -1,9 +1,13 @@
|
||||
# Common dependencies
|
||||
-r requirements-common.txt
|
||||
|
||||
setuptools < 70.0.0 # IPEX's torch have some dependency. to be removed.
|
||||
|
||||
ray >= 2.9
|
||||
cmake>=3.26
|
||||
ninja
|
||||
packaging
|
||||
setuptools-scm>=8
|
||||
wheel
|
||||
jinja2
|
||||
# Following pkgs retrieved from https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
torch == 2.3.1+cxx11.abi
|
||||
intel-extension-for-pytorch == 2.3.110+xpu
|
||||
|
2
setup.py
2
setup.py
@ -415,6 +415,8 @@ def get_requirements() -> List[str]:
|
||||
for line in requirements:
|
||||
if line.startswith("-r "):
|
||||
resolved_requirements += _read_requirements(line.split()[1])
|
||||
elif line.startswith("--"):
|
||||
continue
|
||||
else:
|
||||
resolved_requirements.append(line)
|
||||
return resolved_requirements
|
||||
|
@ -42,6 +42,15 @@ try:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
is_xpu = False
|
||||
|
||||
try:
|
||||
import torch
|
||||
if hasattr(torch, 'xpu') and torch.xpu.is_available():
|
||||
is_xpu = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
is_cpu = False
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
@ -60,6 +69,9 @@ elif is_cuda:
|
||||
elif is_rocm:
|
||||
from .rocm import RocmPlatform
|
||||
current_platform = RocmPlatform()
|
||||
elif is_xpu:
|
||||
from .xpu import XPUPlatform
|
||||
current_platform = XPUPlatform()
|
||||
elif is_cpu:
|
||||
from .cpu import CpuPlatform
|
||||
current_platform = CpuPlatform()
|
||||
|
@ -8,6 +8,7 @@ class PlatformEnum(enum.Enum):
|
||||
CUDA = enum.auto()
|
||||
ROCM = enum.auto()
|
||||
TPU = enum.auto()
|
||||
XPU = enum.auto()
|
||||
CPU = enum.auto()
|
||||
UNSPECIFIED = enum.auto()
|
||||
|
||||
@ -41,6 +42,9 @@ class Platform:
|
||||
def is_tpu(self) -> bool:
|
||||
return self._enum == PlatformEnum.TPU
|
||||
|
||||
def is_xpu(self) -> bool:
|
||||
return self._enum == PlatformEnum.XPU
|
||||
|
||||
def is_cpu(self) -> bool:
|
||||
return self._enum == PlatformEnum.CPU
|
||||
|
||||
|
20
vllm/platforms/xpu.py
Normal file
20
vllm/platforms/xpu.py
Normal file
@ -0,0 +1,20 @@
|
||||
import torch
|
||||
|
||||
from .interface import DeviceCapability, Platform, PlatformEnum
|
||||
|
||||
|
||||
class XPUPlatform(Platform):
|
||||
_enum = PlatformEnum.XPU
|
||||
|
||||
@staticmethod
|
||||
def get_device_capability(device_id: int = 0) -> DeviceCapability:
|
||||
return DeviceCapability(major=int(
|
||||
torch.xpu.get_device_capability(device_id)['version'].split('.')
|
||||
[0]),
|
||||
minor=int(
|
||||
torch.xpu.get_device_capability(device_id)
|
||||
['version'].split('.')[1]))
|
||||
|
||||
@staticmethod
|
||||
def get_device_name(device_id: int = 0) -> str:
|
||||
return torch.xpu.get_device_name(device_id)
|
Loading…
x
Reference in New Issue
Block a user