From 280d074103160d042059dc60c28898fd9fb56568 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 28 Mar 2025 16:36:31 +0800 Subject: [PATCH] [CPU][CI] Improve CPU Dockerfile (#15690) Signed-off-by: jiang1.li --- .buildkite/release-pipeline.yaml | 2 +- .buildkite/run-cpu-test.sh | 16 +- Dockerfile.cpu | 155 +++++++++++++----- .../getting_started/installation/cpu.md | 35 +++- .../installation/cpu/x86.inc.md | 2 + 5 files changed, 151 insertions(+), 59 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 18f582b6..a1dcb01e 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -82,7 +82,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain -f Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index 05744bb5..bf9f191d 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -8,15 +8,19 @@ set -ex CORE_RANGE=${CORE_RANGE:-48-95} NUMA_NODE=${NUMA_NODE:-1} -# Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f Dockerfile.cpu . - # Setup cleanup -remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; } +remove_docker_container() { + set -e; + docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; + docker image rm cpu-test-"$BUILDKITE_BUILD_NUMBER" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 || true; +} trap remove_docker_container EXIT remove_docker_container +# Try building the docker image +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu . + # Run the image, setting --shm-size=4g for tensor parallel. docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \ --cpuset-mems="$NUMA_NODE" --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER" @@ -36,8 +40,6 @@ function cpu_tests() { # Run basic model test docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c " set -e - pip install -r vllm/requirements/test.txt - pip install -r vllm/requirements/cpu.txt pytest -v -s tests/kernels/test_cache.py -m cpu_model pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model pytest -v -s tests/models/decoder_only/language -m cpu_model diff --git a/Dockerfile.cpu b/Dockerfile.cpu index a1009052..81336518 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -1,69 +1,138 @@ # This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. +# +# Build targets: +# vllm-openai (default): used for serving deployment +# vllm-test: used for CI tests +# vllm-dev: used for development +# +# Build arguments: +# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9 +# VLLM_CPU_DISABLE_AVX512=false (default)|true +# -FROM ubuntu:22.04 AS cpu-test-1 +######################### BASE IMAGE ######################### +FROM ubuntu:22.04 AS base + +WORKDIR /workspace/ + +ARG PYTHON_VERSION=3.12 +ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" + +# Install minimal dependencies and uv +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update -y \ + && apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \ + gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \ + && curl -LsSf https://astral.sh/uv/install.sh | sh ENV CCACHE_DIR=/root/.cache/ccache - ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache -RUN --mount=type=cache,target=/var/cache/apt \ - apt-get update -y \ - && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ - && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ - && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 +ENV PATH="/root/.local/bin:$PATH" +ENV VIRTUAL_ENV="/opt/venv" +RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} +ENV PATH="$VIRTUAL_ENV/bin:$PATH" -# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html -# intel-openmp provides additional performance improvement vs. openmp -# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install intel-openmp==2025.0.1 +ENV UV_HTTP_TIMEOUT=500 -ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" +# Install Python dependencies +ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +ENV UV_INDEX_STRATEGY="unsafe-best-match" +ENV UV_LINK_MODE="copy" +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ + --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ + uv pip install --upgrade pip && \ + uv pip install -r requirements/cpu.txt + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0 + +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD" RUN echo 'ulimit -c 0' >> ~/.bashrc -RUN pip install intel_extension_for_pytorch==2.6.0 +######################### BUILD IMAGE ######################### +FROM base AS vllm-build -WORKDIR /workspace - -ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" -ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \ - pip install --upgrade pip && \ - pip install -r requirements/build.txt - -FROM cpu-test-1 AS build - -WORKDIR /workspace/vllm - -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ - --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ - pip install -v -r requirements/cpu.txt - -COPY . . ARG GIT_REPO_CHECK=0 -RUN --mount=type=bind,source=.git,target=.git \ - if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi - # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... ARG VLLM_CPU_DISABLE_AVX512 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} -RUN --mount=type=cache,target=/root/.cache/pip \ +WORKDIR /workspace/vllm + +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \ + uv pip install -r requirements/build.txt + +COPY . . +RUN --mount=type=bind,source=.git,target=.git \ + if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi + +RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ - pip install dist/*.whl && \ - rm -rf dist + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel + +######################### DEV IMAGE ######################### +FROM vllm-build AS vllm-dev + +WORKDIR /workspace/vllm + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get install -y --no-install-recommends vim numactl + +# install development dependencies (for testing) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -e tests/vllm_test_utils + +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=bind,source=.git,target=.git \ + VLLM_TARGET_DEVICE=cpu python3 setup.py develop + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -r requirements/dev.txt && \ + pre-commit install --hook-type pre-commit --hook-type commit-msg + +ENTRYPOINT ["bash"] + +######################### TEST IMAGE ######################### +FROM base AS vllm-test WORKDIR /workspace/ -RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,src=requirements/test.txt,target=requirements/test.txt \ + uv pip install -r requirements/test.txt + +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \ + uv pip install dist/*.whl + +ADD ./tests/ ./tests/ +ADD ./examples/ ./examples/ +ADD ./benchmarks/ ./benchmarks/ # install development dependencies (for testing) -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -e tests/vllm_test_utils +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -e tests/vllm_test_utils + +ENTRYPOINT ["bash"] + +######################### RELEASE IMAGE ######################### +FROM base AS vllm-openai + +WORKDIR /workspace/ + +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \ + uv pip install dist/*.whl ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md index 1b2ffd61..844b184a 100644 --- a/docs/source/getting_started/installation/cpu.md +++ b/docs/source/getting_started/installation/cpu.md @@ -159,18 +159,37 @@ Currently, there are no pre-built CPU wheels. ### Pre-built images -Currently, there are no pre-build CPU images. +:::::{tab-set} +:sync-group: device + +::::{tab-item} Intel/AMD x86 +:sync: x86 + +:::{include} cpu/x86.inc.md +:start-after: "### Pre-built images" +:end-before: "### Build image from source" +::: + +:::: + +::::: ### Build image from source ```console -$ docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . -$ docker run -it \ - --rm \ - --network=host \ - --cpuset-cpus= \ - --cpuset-mems= \ - vllm-cpu-env +$ docker build -f Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai . + +# Launching OpenAI server +$ docker run --rm \ + --privileged=true \ + --shm-size=4g \ + -p 8000:8000 \ + -e VLLM_CPU_KVCACHE_SPACE= \ + -e VLLM_CPU_OMP_THREADS_BIND= \ + vllm-cpu-env \ + --model=meta-llama/Llama-3.2-1B-Instruct \ + --dtype=bfloat16 \ + other vLLM OpenAI server arguments ``` ::::{tip} diff --git a/docs/source/getting_started/installation/cpu/x86.inc.md b/docs/source/getting_started/installation/cpu/x86.inc.md index b2f3bafb..9ae2035d 100644 --- a/docs/source/getting_started/installation/cpu/x86.inc.md +++ b/docs/source/getting_started/installation/cpu/x86.inc.md @@ -34,6 +34,8 @@ There are no pre-built wheels or images for this device, so you must build vLLM ### Pre-built images +See [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo) + ### Build image from source ## Extra information