Add Dockerfile (#1350)

This commit is contained in:
Stephen Krider 2023-10-31 12:36:47 -07:00 committed by GitHub
parent 7b895c5976
commit 9cabcb7645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 101 additions and 2 deletions

72
Dockerfile Normal file
View File

@ -0,0 +1,72 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
RUN apt-get update -y \
&& apt-get install -y python3-pip
WORKDIR /workspace
# install build and runtime dependencies
COPY requirements.txt requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
# install development dependencies
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-dev.txt
# image to build pytorch extensions
FROM dev AS build
# copy input files
COPY csrc csrc
COPY setup.py setup.py
COPY requirements.txt requirements.txt
COPY pyproject.toml pyproject.toml
COPY vllm/__init__.py vllm/__init__.py
# max jobs used by Ninja to build extensions
ENV MAX_JOBS=$max_jobs
RUN python3 setup.py build_ext --inplace
# image to run unit testing suite
FROM dev AS test
# copy pytorch extensions separately to avoid having to rebuild
# when python code changes
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY tests tests
COPY vllm vllm
ENTRYPOINT ["python3", "-m", "pytest", "tests"]
# use CUDA base as CUDA runtime dependencies are already installed via pip
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS vllm-base
# libnccl required for ray
RUN apt-get update -y \
&& apt-get install -y python3-pip
WORKDIR /workspace
COPY requirements.txt requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
FROM vllm-base AS vllm
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY vllm vllm
EXPOSE 8000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
# openai api server alternative
FROM vllm-base AS vllm-openai
# install additional dependencies for openai api server
RUN --mount=type=cache,target=/root/.cache/pip \
pip install accelerate fschat
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY vllm vllm
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

View File

@ -65,6 +65,7 @@ Documentation
serving/distributed_serving
serving/run_on_sky
serving/deploying_with_triton
serving/deploying_with_docker
.. toctree::
:maxdepth: 1

View File

@ -0,0 +1,21 @@
.. _deploying_with_docker:
Deploying with Docker
============================
You can build and run vLLM from source via the provided dockerfile. To build vLLM:
.. code-block:: console
$ DOCKER_BUILDKIT=1 docker build . --target vllm --tag vllm --build-arg max_jobs=8
To run vLLM:
.. code-block:: console
$ docker run --runtime nvidia --gpus all \
-v ~/.cache/huggingface:/root/.cache/huggingface \
-p 8000:8000 \
--env "HUGGING_FACE_HUB_TOKEN=<secret>" \
vllm <args...>

View File

@ -12,3 +12,4 @@ types-setuptools
pytest
pytest-forked
pytest-asyncio

View File

@ -239,8 +239,12 @@ def find_version(filepath: str):
def read_readme() -> str:
"""Read the README file."""
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
"""Read the README file if present."""
p = get_path("README.md")
if os.path.isfile(p):
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
else:
return ""
def get_requirements() -> List[str]: