[ci] Setup Release pipeline and build release wheels with cache (#5610)

Signed-off-by: kevin <kevin@anyscale.com>
This commit is contained in:
Kevin H. Luu 2024-06-18 11:00:36 -07:00 committed by GitHub
parent 95db455e7f
commit 19091efc44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 62 additions and 17 deletions

View File

@ -0,0 +1,21 @@
steps:
- block: "Build wheels"
- label: "Build wheel - Python {{matrix.python_version}}, CUDA {{matrix.cuda_version}}"
agents:
queue: cpu_queue
commands:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION={{matrix.cuda_version}} --build-arg PYTHON_VERSION={{matrix.python_version}} --tag vllm-ci:build-image --target build --progress plain ."
- "mkdir artifacts"
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image cp -r dist /artifacts_host"
- "aws s3 cp --recursive artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/"
matrix:
setup:
cuda_version:
- "11.8.0"
- "12.1.0"
python_version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"

View File

@ -5,9 +5,26 @@
# docs/source/dev/dockerfile/dockerfile.rst and # docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png # docs/source/assets/dev/dockerfile-stages-dependency.png
ARG CUDA_VERSION=12.4.1
#################### BASE BUILD IMAGE #################### #################### BASE BUILD IMAGE ####################
# prepare basic build environment # prepare basic build environment
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
ARG CUDA_VERSION=12.4.1
ARG PYTHON_VERSION=3
ENV DEBIAN_FRONTEND=noninteractive
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt-get update -y \
&& apt-get install -y ccache software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update -y \
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
&& if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
&& python3 --version \
&& python3 -m pip --version
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y python3-pip git curl sudo && apt-get install -y python3-pip git curl sudo
@ -16,7 +33,7 @@ RUN apt-get update -y \
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image # this won't be needed for future versions of this docker image
# or future versions of triton. # or future versions of triton.
RUN ldconfig /usr/local/cuda-12.4/compat/ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
WORKDIR /workspace WORKDIR /workspace
@ -24,14 +41,7 @@ WORKDIR /workspace
COPY requirements-common.txt requirements-common.txt COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt COPY requirements-cuda.txt requirements-cuda.txt
RUN --mount=type=cache,target=/root/.cache/pip \ RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-cuda.txt python3 -m pip install -r requirements-cuda.txt
# install development dependencies
COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-dev.txt
# cuda arch list used by torch # cuda arch list used by torch
# can be useful for both `dev` and `test` # can be useful for both `dev` and `test`
@ -41,14 +51,16 @@ ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
#################### BASE BUILD IMAGE #################### #################### BASE BUILD IMAGE ####################
#################### WHEEL BUILD IMAGE #################### #################### WHEEL BUILD IMAGE ####################
FROM dev AS build FROM base AS build
ARG PYTHON_VERSION=3
# install build dependencies # install build dependencies
COPY requirements-build.txt requirements-build.txt COPY requirements-build.txt requirements-build.txt
RUN --mount=type=cache,target=/root/.cache/pip \ RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-build.txt python3 -m pip install -r requirements-build.txt
# install compiler cache to speed up compilation leveraging local or remote caching # install compiler cache to speed up compilation leveraging local or remote caching
RUN apt-get update -y && apt-get install -y ccache RUN apt-get update -y && apt-get install -y ccache
@ -101,9 +113,21 @@ RUN python3 check-wheel-size.py dist
#################### EXTENSION Build IMAGE #################### #################### EXTENSION Build IMAGE ####################
#################### DEV IMAGE ####################
FROM base as dev
COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install -r requirements-dev.txt
#################### DEV IMAGE ####################
#################### vLLM installation IMAGE #################### #################### vLLM installation IMAGE ####################
# image with vLLM installed # image with vLLM installed
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
ARG CUDA_VERSION=12.4.1
WORKDIR /vllm-workspace WORKDIR /vllm-workspace
RUN apt-get update -y \ RUN apt-get update -y \
@ -113,12 +137,12 @@ RUN apt-get update -y \
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image # this won't be needed for future versions of this docker image
# or future versions of triton. # or future versions of triton.
RUN ldconfig /usr/local/cuda-12.4/compat/ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# install vllm wheel first, so that torch etc will be installed # install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \
pip install dist/*.whl --verbose python3 -m pip install dist/*.whl --verbose
#################### vLLM installation IMAGE #################### #################### vLLM installation IMAGE ####################
@ -131,7 +155,7 @@ ADD . /vllm-workspace/
# install development dependencies (for testing) # install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/pip \ RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-dev.txt python3 -m pip install -r requirements-dev.txt
# doc requires source code # doc requires source code
# we hide them inside `test_docs/` , so that this source code # we hide them inside `test_docs/` , so that this source code