[CPU][CI] Improve CPU Dockerfile (#15690)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
32b14baf8a
commit
280d074103
@ -82,7 +82,7 @@ steps:
|
|||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain -f Dockerfile.cpu ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ."
|
||||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
@ -8,15 +8,19 @@ set -ex
|
|||||||
CORE_RANGE=${CORE_RANGE:-48-95}
|
CORE_RANGE=${CORE_RANGE:-48-95}
|
||||||
NUMA_NODE=${NUMA_NODE:-1}
|
NUMA_NODE=${NUMA_NODE:-1}
|
||||||
|
|
||||||
# Try building the docker image
|
|
||||||
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f Dockerfile.cpu .
|
|
||||||
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f Dockerfile.cpu .
|
|
||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; }
|
remove_docker_container() {
|
||||||
|
set -e;
|
||||||
|
docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true;
|
||||||
|
docker image rm cpu-test-"$BUILDKITE_BUILD_NUMBER" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 || true;
|
||||||
|
}
|
||||||
trap remove_docker_container EXIT
|
trap remove_docker_container EXIT
|
||||||
remove_docker_container
|
remove_docker_container
|
||||||
|
|
||||||
|
# Try building the docker image
|
||||||
|
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu .
|
||||||
|
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu .
|
||||||
|
|
||||||
# Run the image, setting --shm-size=4g for tensor parallel.
|
# Run the image, setting --shm-size=4g for tensor parallel.
|
||||||
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \
|
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \
|
||||||
--cpuset-mems="$NUMA_NODE" --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"
|
--cpuset-mems="$NUMA_NODE" --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"
|
||||||
@ -36,8 +40,6 @@ function cpu_tests() {
|
|||||||
# Run basic model test
|
# Run basic model test
|
||||||
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "
|
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "
|
||||||
set -e
|
set -e
|
||||||
pip install -r vllm/requirements/test.txt
|
|
||||||
pip install -r vllm/requirements/cpu.txt
|
|
||||||
pytest -v -s tests/kernels/test_cache.py -m cpu_model
|
pytest -v -s tests/kernels/test_cache.py -m cpu_model
|
||||||
pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
|
pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
|
||||||
pytest -v -s tests/models/decoder_only/language -m cpu_model
|
pytest -v -s tests/models/decoder_only/language -m cpu_model
|
||||||
|
155
Dockerfile.cpu
155
Dockerfile.cpu
@ -1,69 +1,138 @@
|
|||||||
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
|
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
|
||||||
|
#
|
||||||
|
# Build targets:
|
||||||
|
# vllm-openai (default): used for serving deployment
|
||||||
|
# vllm-test: used for CI tests
|
||||||
|
# vllm-dev: used for development
|
||||||
|
#
|
||||||
|
# Build arguments:
|
||||||
|
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
|
||||||
|
# VLLM_CPU_DISABLE_AVX512=false (default)|true
|
||||||
|
#
|
||||||
|
|
||||||
FROM ubuntu:22.04 AS cpu-test-1
|
######################### BASE IMAGE #########################
|
||||||
|
FROM ubuntu:22.04 AS base
|
||||||
|
|
||||||
|
WORKDIR /workspace/
|
||||||
|
|
||||||
|
ARG PYTHON_VERSION=3.12
|
||||||
|
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
||||||
|
|
||||||
|
# Install minimal dependencies and uv
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
apt-get update -y \
|
||||||
|
&& apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
|
||||||
|
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \
|
||||||
|
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
|
||||||
|
&& curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
ENV CCACHE_DIR=/root/.cache/ccache
|
ENV CCACHE_DIR=/root/.cache/ccache
|
||||||
|
|
||||||
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/var/cache/apt \
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
apt-get update -y \
|
ENV VIRTUAL_ENV="/opt/venv"
|
||||||
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
|
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
||||||
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
|
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||||
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
|
|
||||||
|
|
||||||
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
# intel-openmp provides additional performance improvement vs. openmp
|
|
||||||
# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
||||||
pip install intel-openmp==2025.0.1
|
|
||||||
|
|
||||||
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
|
# Install Python dependencies
|
||||||
|
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||||
|
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||||
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
|
ENV UV_LINK_MODE="copy"
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
|
||||||
|
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
|
||||||
|
uv pip install --upgrade pip && \
|
||||||
|
uv pip install -r requirements/cpu.txt
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0
|
||||||
|
|
||||||
|
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
|
||||||
|
|
||||||
RUN echo 'ulimit -c 0' >> ~/.bashrc
|
RUN echo 'ulimit -c 0' >> ~/.bashrc
|
||||||
|
|
||||||
RUN pip install intel_extension_for_pytorch==2.6.0
|
######################### BUILD IMAGE #########################
|
||||||
|
FROM base AS vllm-build
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
|
|
||||||
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
|
||||||
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
||||||
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
|
|
||||||
pip install --upgrade pip && \
|
|
||||||
pip install -r requirements/build.txt
|
|
||||||
|
|
||||||
FROM cpu-test-1 AS build
|
|
||||||
|
|
||||||
WORKDIR /workspace/vllm
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
||||||
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
|
|
||||||
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
|
|
||||||
pip install -v -r requirements/cpu.txt
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
ARG GIT_REPO_CHECK=0
|
ARG GIT_REPO_CHECK=0
|
||||||
RUN --mount=type=bind,source=.git,target=.git \
|
|
||||||
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
|
|
||||||
|
|
||||||
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
|
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
|
||||||
ARG VLLM_CPU_DISABLE_AVX512
|
ARG VLLM_CPU_DISABLE_AVX512
|
||||||
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
|
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
WORKDIR /workspace/vllm
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
|
||||||
|
uv pip install -r requirements/build.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
|
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
--mount=type=cache,target=/root/.cache/ccache \
|
--mount=type=cache,target=/root/.cache/ccache \
|
||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
|
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
||||||
pip install dist/*.whl && \
|
|
||||||
rm -rf dist
|
######################### DEV IMAGE #########################
|
||||||
|
FROM vllm-build AS vllm-dev
|
||||||
|
|
||||||
|
WORKDIR /workspace/vllm
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
apt-get install -y --no-install-recommends vim numactl
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=cache,target=/root/.cache/ccache \
|
||||||
|
--mount=type=bind,source=.git,target=.git \
|
||||||
|
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install -r requirements/dev.txt && \
|
||||||
|
pre-commit install --hook-type pre-commit --hook-type commit-msg
|
||||||
|
|
||||||
|
ENTRYPOINT ["bash"]
|
||||||
|
|
||||||
|
######################### TEST IMAGE #########################
|
||||||
|
FROM base AS vllm-test
|
||||||
|
|
||||||
WORKDIR /workspace/
|
WORKDIR /workspace/
|
||||||
|
|
||||||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,src=requirements/test.txt,target=requirements/test.txt \
|
||||||
|
uv pip install -r requirements/test.txt
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
|
||||||
|
uv pip install dist/*.whl
|
||||||
|
|
||||||
|
ADD ./tests/ ./tests/
|
||||||
|
ADD ./examples/ ./examples/
|
||||||
|
ADD ./benchmarks/ ./benchmarks/
|
||||||
|
|
||||||
# install development dependencies (for testing)
|
# install development dependencies (for testing)
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
pip install -e tests/vllm_test_utils
|
uv pip install -e tests/vllm_test_utils
|
||||||
|
|
||||||
|
ENTRYPOINT ["bash"]
|
||||||
|
|
||||||
|
######################### RELEASE IMAGE #########################
|
||||||
|
FROM base AS vllm-openai
|
||||||
|
|
||||||
|
WORKDIR /workspace/
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=cache,target=/root/.cache/ccache \
|
||||||
|
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
|
||||||
|
uv pip install dist/*.whl
|
||||||
|
|
||||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||||
|
@ -159,18 +159,37 @@ Currently, there are no pre-built CPU wheels.
|
|||||||
|
|
||||||
### Pre-built images
|
### Pre-built images
|
||||||
|
|
||||||
Currently, there are no pre-build CPU images.
|
:::::{tab-set}
|
||||||
|
:sync-group: device
|
||||||
|
|
||||||
|
::::{tab-item} Intel/AMD x86
|
||||||
|
:sync: x86
|
||||||
|
|
||||||
|
:::{include} cpu/x86.inc.md
|
||||||
|
:start-after: "### Pre-built images"
|
||||||
|
:end-before: "### Build image from source"
|
||||||
|
:::
|
||||||
|
|
||||||
|
::::
|
||||||
|
|
||||||
|
:::::
|
||||||
|
|
||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
|
$ docker build -f Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
|
||||||
$ docker run -it \
|
|
||||||
--rm \
|
# Launching OpenAI server
|
||||||
--network=host \
|
$ docker run --rm \
|
||||||
--cpuset-cpus=<cpu-id-list, optional> \
|
--privileged=true \
|
||||||
--cpuset-mems=<memory-node, optional> \
|
--shm-size=4g \
|
||||||
vllm-cpu-env
|
-p 8000:8000 \
|
||||||
|
-e VLLM_CPU_KVCACHE_SPACE=<KV cache space> \
|
||||||
|
-e VLLM_CPU_OMP_THREADS_BIND=<CPU cores for inference> \
|
||||||
|
vllm-cpu-env \
|
||||||
|
--model=meta-llama/Llama-3.2-1B-Instruct \
|
||||||
|
--dtype=bfloat16 \
|
||||||
|
other vLLM OpenAI server arguments
|
||||||
```
|
```
|
||||||
|
|
||||||
::::{tip}
|
::::{tip}
|
||||||
|
@ -34,6 +34,8 @@ There are no pre-built wheels or images for this device, so you must build vLLM
|
|||||||
|
|
||||||
### Pre-built images
|
### Pre-built images
|
||||||
|
|
||||||
|
See [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo)
|
||||||
|
|
||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
## Extra information
|
## Extra information
|
||||||
|
Loading…
x
Reference in New Issue
Block a user