[Bugfix][Intel] Fix XPU Dockerfile Build (#7824)
Signed-off-by: tylertitsworth <tyler.titsworth@intel.com> Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
parent
d86f6b2afb
commit
260024a374
@ -11,4 +11,4 @@ trap remove_docker_container EXIT
|
|||||||
remove_docker_container
|
remove_docker_container
|
||||||
|
|
||||||
# Run the image and launch offline inference
|
# Run the image and launch offline inference
|
||||||
docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path xpu-test python3 examples/offline_inference.py
|
docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test python3 examples/offline_inference.py
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
vllm/*.so
|
/.github/
|
||||||
/.venv
|
/.venv
|
||||||
/build
|
/build
|
||||||
dist
|
dist
|
||||||
|
Dockerfile*
|
||||||
|
vllm/*.so
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04
|
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS vllm-base
|
||||||
|
|
||||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
||||||
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
||||||
@ -8,19 +8,48 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
|
|||||||
chmod 644 /usr/share/keyrings/intel-graphics.gpg
|
chmod 644 /usr/share/keyrings/intel-graphics.gpg
|
||||||
|
|
||||||
RUN apt-get update -y && \
|
RUN apt-get update -y && \
|
||||||
apt-get install -y curl libicu70 lsb-release git wget vim numactl python3 python3-pip ffmpeg libsm6 libxext6 libgl1
|
apt-get install -y --no-install-recommends --fix-missing \
|
||||||
|
curl \
|
||||||
|
ffmpeg \
|
||||||
|
git \
|
||||||
|
libsndfile1 \
|
||||||
|
libsm6 \
|
||||||
|
libxext6 \
|
||||||
|
libgl1 \
|
||||||
|
lsb-release \
|
||||||
|
numactl \
|
||||||
|
python3 \
|
||||||
|
python3-dev \
|
||||||
|
python3-pip \
|
||||||
|
# vim \
|
||||||
|
wget
|
||||||
|
|
||||||
|
WORKDIR /workspace/vllm
|
||||||
|
COPY requirements-xpu.txt /workspace/vllm/requirements-xpu.txt
|
||||||
|
COPY requirements-common.txt /workspace/vllm/requirements-common.txt
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
pip install --no-cache-dir \
|
||||||
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
|
||||||
|
-r requirements-xpu.txt
|
||||||
|
|
||||||
COPY ./ /workspace/vllm
|
COPY ./ /workspace/vllm
|
||||||
|
|
||||||
WORKDIR /workspace/vllm
|
ENV VLLM_TARGET_DEVICE=xpu
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
||||||
pip install -v --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
|
|
||||||
cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \
|
|
||||||
-r requirements-xpu.txt
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
VLLM_TARGET_DEVICE=xpu python3 setup.py install
|
python3 setup.py install
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|
||||||
|
FROM vllm-base AS vllm-openai
|
||||||
|
|
||||||
|
# install additional dependencies for openai api server
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
pip install accelerate hf_transfer 'modelscope!=1.15.0'
|
||||||
|
|
||||||
|
ENV VLLM_USAGE_SOURCE production-docker-image \
|
||||||
|
TRITON_XPU_PROFILE 1
|
||||||
|
|
||||||
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
psutil
|
psutil
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
sentencepiece # Required for LLaMA tokenizer.
|
||||||
numpy < 2.0.0
|
numpy < 2.0.0
|
||||||
requests
|
requests >= 2.26.0
|
||||||
tqdm
|
tqdm
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.45.0 # Required for Llama 3.2.
|
transformers >= 4.45.0 # Required for Llama 3.2.
|
||||||
|
@ -1,9 +1,13 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r requirements-common.txt
|
-r requirements-common.txt
|
||||||
|
|
||||||
setuptools < 70.0.0 # IPEX's torch have some dependency. to be removed.
|
|
||||||
|
|
||||||
ray >= 2.9
|
ray >= 2.9
|
||||||
|
cmake>=3.26
|
||||||
|
ninja
|
||||||
|
packaging
|
||||||
|
setuptools-scm>=8
|
||||||
|
wheel
|
||||||
|
jinja2
|
||||||
# Following pkgs retrieved from https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
# Following pkgs retrieved from https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
torch == 2.3.1+cxx11.abi
|
torch == 2.3.1+cxx11.abi
|
||||||
intel-extension-for-pytorch == 2.3.110+xpu
|
intel-extension-for-pytorch == 2.3.110+xpu
|
||||||
|
2
setup.py
2
setup.py
@ -415,6 +415,8 @@ def get_requirements() -> List[str]:
|
|||||||
for line in requirements:
|
for line in requirements:
|
||||||
if line.startswith("-r "):
|
if line.startswith("-r "):
|
||||||
resolved_requirements += _read_requirements(line.split()[1])
|
resolved_requirements += _read_requirements(line.split()[1])
|
||||||
|
elif line.startswith("--"):
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
resolved_requirements.append(line)
|
resolved_requirements.append(line)
|
||||||
return resolved_requirements
|
return resolved_requirements
|
||||||
|
@ -42,6 +42,15 @@ try:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
is_xpu = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
if hasattr(torch, 'xpu') and torch.xpu.is_available():
|
||||||
|
is_xpu = True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
is_cpu = False
|
is_cpu = False
|
||||||
try:
|
try:
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
@ -60,6 +69,9 @@ elif is_cuda:
|
|||||||
elif is_rocm:
|
elif is_rocm:
|
||||||
from .rocm import RocmPlatform
|
from .rocm import RocmPlatform
|
||||||
current_platform = RocmPlatform()
|
current_platform = RocmPlatform()
|
||||||
|
elif is_xpu:
|
||||||
|
from .xpu import XPUPlatform
|
||||||
|
current_platform = XPUPlatform()
|
||||||
elif is_cpu:
|
elif is_cpu:
|
||||||
from .cpu import CpuPlatform
|
from .cpu import CpuPlatform
|
||||||
current_platform = CpuPlatform()
|
current_platform = CpuPlatform()
|
||||||
|
@ -8,6 +8,7 @@ class PlatformEnum(enum.Enum):
|
|||||||
CUDA = enum.auto()
|
CUDA = enum.auto()
|
||||||
ROCM = enum.auto()
|
ROCM = enum.auto()
|
||||||
TPU = enum.auto()
|
TPU = enum.auto()
|
||||||
|
XPU = enum.auto()
|
||||||
CPU = enum.auto()
|
CPU = enum.auto()
|
||||||
UNSPECIFIED = enum.auto()
|
UNSPECIFIED = enum.auto()
|
||||||
|
|
||||||
@ -41,6 +42,9 @@ class Platform:
|
|||||||
def is_tpu(self) -> bool:
|
def is_tpu(self) -> bool:
|
||||||
return self._enum == PlatformEnum.TPU
|
return self._enum == PlatformEnum.TPU
|
||||||
|
|
||||||
|
def is_xpu(self) -> bool:
|
||||||
|
return self._enum == PlatformEnum.XPU
|
||||||
|
|
||||||
def is_cpu(self) -> bool:
|
def is_cpu(self) -> bool:
|
||||||
return self._enum == PlatformEnum.CPU
|
return self._enum == PlatformEnum.CPU
|
||||||
|
|
||||||
|
20
vllm/platforms/xpu.py
Normal file
20
vllm/platforms/xpu.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from .interface import DeviceCapability, Platform, PlatformEnum
|
||||||
|
|
||||||
|
|
||||||
|
class XPUPlatform(Platform):
|
||||||
|
_enum = PlatformEnum.XPU
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_device_capability(device_id: int = 0) -> DeviceCapability:
|
||||||
|
return DeviceCapability(major=int(
|
||||||
|
torch.xpu.get_device_capability(device_id)['version'].split('.')
|
||||||
|
[0]),
|
||||||
|
minor=int(
|
||||||
|
torch.xpu.get_device_capability(device_id)
|
||||||
|
['version'].split('.')[1]))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_device_name(device_id: int = 0) -> str:
|
||||||
|
return torch.xpu.get_device_name(device_id)
|
Loading…
x
Reference in New Issue
Block a user