From 9cabcb76453e7d8384d1858663f1841cef174c2c Mon Sep 17 00:00:00 2001
From: Stephen Krider <72541272+skrider@users.noreply.github.com>
Date: Tue, 31 Oct 2023 12:36:47 -0700
Subject: [PATCH] Add Dockerfile (#1350)

---
 Dockerfile                                    | 72 +++++++++++++++++++
 docs/source/index.rst                         |  1 +
 docs/source/serving/deploying_with_docker.rst | 21 ++++++
 requirements-dev.txt                          |  1 +
 setup.py                                      |  8 ++-
 5 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 docs/source/serving/deploying_with_docker.rst
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..72f02494
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,72 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
+
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+
+WORKDIR /workspace
+
+# install build and runtime dependencies
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+    
+# install development dependencies
+COPY requirements-dev.txt requirements-dev.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-dev.txt
+
+# image to build pytorch extensions
+FROM dev AS build
+
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY requirements.txt requirements.txt
+COPY pyproject.toml pyproject.toml
+COPY vllm/__init__.py vllm/__init__.py
+
+# max jobs used by Ninja to build extensions
+ENV MAX_JOBS=$max_jobs 
+RUN python3 setup.py build_ext --inplace
+
+# image to run unit testing suite
+FROM dev AS test
+
+# copy pytorch extensions separately to avoid having to rebuild
+# when python code changes
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY tests tests
+COPY vllm vllm
+
+ENTRYPOINT ["python3", "-m", "pytest", "tests"]
+
+# use CUDA base as CUDA runtime dependencies are already installed via pip
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS vllm-base
+
+# libnccl required for ray
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+
+WORKDIR /workspace
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+
+FROM vllm-base AS vllm
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+
+EXPOSE 8000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
+
+# openai api server alternative
+FROM vllm-base AS vllm-openai
+# install additional dependencies for openai api server
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install accelerate fschat
+
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f2131cd8..60a5b07f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -65,6 +65,7 @@ Documentation
    serving/distributed_serving
    serving/run_on_sky
    serving/deploying_with_triton
+   serving/deploying_with_docker
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst
new file mode 100644
index 00000000..c73288cb
--- /dev/null
+++ b/docs/source/serving/deploying_with_docker.rst
@@ -0,0 +1,21 @@
+.. _deploying_with_docker:
+
+Deploying with Docker
+============================
+
+You can build and run vLLM from source via the provided dockerfile. To build vLLM:
+
+.. code-block:: console
+
+    $ DOCKER_BUILDKIT=1 docker build . --target vllm --tag vllm --build-arg max_jobs=8
+
+To run vLLM:
+
+.. code-block:: console
+
+    $ docker run --runtime nvidia --gpus all \
+        -v ~/.cache/huggingface:/root/.cache/huggingface \
+        -p 8000:8000 \
+        --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+        vllm <args...>
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index bfa1d06d..b78976e8 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -12,3 +12,4 @@ types-setuptools
 pytest
 pytest-forked
 pytest-asyncio
+
diff --git a/setup.py b/setup.py
index daa40928..660b5196 100644
--- a/setup.py
+++ b/setup.py
@@ -239,8 +239,12 @@ def find_version(filepath: str):
 
 
 def read_readme() -> str:
-    """Read the README file."""
-    return io.open(get_path("README.md"), "r", encoding="utf-8").read()
+    """Read the README file if present."""
+    p = get_path("README.md")
+    if os.path.isfile(p):
+        return io.open(get_path("README.md"), "r", encoding="utf-8").read()
+    else:
+        return ""
 
 
 def get_requirements() -> List[str]: