From 9cabcb76453e7d8384d1858663f1841cef174c2c Mon Sep 17 00:00:00 2001 From: Stephen Krider <72541272+skrider@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:36:47 -0700 Subject: [PATCH] Add Dockerfile (#1350) --- Dockerfile | 72 +++++++++++++++++++ docs/source/index.rst | 1 + docs/source/serving/deploying_with_docker.rst | 21 ++++++ requirements-dev.txt | 1 + setup.py | 8 ++- 5 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 docs/source/serving/deploying_with_docker.rst diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..72f02494 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,72 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev + +RUN apt-get update -y \ + && apt-get install -y python3-pip + +WORKDIR /workspace + +# install build and runtime dependencies +COPY requirements.txt requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.txt + +# install development dependencies +COPY requirements-dev.txt requirements-dev.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements-dev.txt + +# image to build pytorch extensions +FROM dev AS build + +# copy input files +COPY csrc csrc +COPY setup.py setup.py +COPY requirements.txt requirements.txt +COPY pyproject.toml pyproject.toml +COPY vllm/__init__.py vllm/__init__.py + +# max jobs used by Ninja to build extensions +ENV MAX_JOBS=$max_jobs +RUN python3 setup.py build_ext --inplace + +# image to run unit testing suite +FROM dev AS test + +# copy pytorch extensions separately to avoid having to rebuild +# when python code changes +COPY --from=build /workspace/vllm/*.so /workspace/vllm/ +COPY tests tests +COPY vllm vllm + +ENTRYPOINT ["python3", "-m", "pytest", "tests"] + +# use CUDA base as CUDA runtime dependencies are already installed via pip +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS vllm-base + +# libnccl required for ray +RUN apt-get update -y \ + && apt-get install -y python3-pip + +WORKDIR /workspace +COPY requirements.txt requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.txt + +FROM vllm-base AS vllm +COPY --from=build /workspace/vllm/*.so /workspace/vllm/ +COPY vllm vllm + +EXPOSE 8000 +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"] + +# openai api server alternative +FROM vllm-base AS vllm-openai +# install additional dependencies for openai api server +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install accelerate fschat + +COPY --from=build /workspace/vllm/*.so /workspace/vllm/ +COPY vllm vllm + +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] + diff --git a/docs/source/index.rst b/docs/source/index.rst index f2131cd8..60a5b07f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -65,6 +65,7 @@ Documentation serving/distributed_serving serving/run_on_sky serving/deploying_with_triton + serving/deploying_with_docker .. toctree:: :maxdepth: 1 diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst new file mode 100644 index 00000000..c73288cb --- /dev/null +++ b/docs/source/serving/deploying_with_docker.rst @@ -0,0 +1,21 @@ +.. _deploying_with_docker: + +Deploying with Docker +============================ + +You can build and run vLLM from source via the provided dockerfile. To build vLLM: + +.. code-block:: console + + $ DOCKER_BUILDKIT=1 docker build . --target vllm --tag vllm --build-arg max_jobs=8 + +To run vLLM: + +.. code-block:: console + + $ docker run --runtime nvidia --gpus all \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + -p 8000:8000 \ + --env "HUGGING_FACE_HUB_TOKEN=" \ + vllm + diff --git a/requirements-dev.txt b/requirements-dev.txt index bfa1d06d..b78976e8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,3 +12,4 @@ types-setuptools pytest pytest-forked pytest-asyncio + diff --git a/setup.py b/setup.py index daa40928..660b5196 100644 --- a/setup.py +++ b/setup.py @@ -239,8 +239,12 @@ def find_version(filepath: str): def read_readme() -> str: - """Read the README file.""" - return io.open(get_path("README.md"), "r", encoding="utf-8").read() + """Read the README file if present.""" + p = get_path("README.md") + if os.path.isfile(p): + return io.open(get_path("README.md"), "r", encoding="utf-8").read() + else: + return "" def get_requirements() -> List[str]: