Add Dockerfile (#1350)
This commit is contained in:
parent
7b895c5976
commit
9cabcb7645
72
Dockerfile
Normal file
72
Dockerfile
Normal file
@ -0,0 +1,72 @@
|
||||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y python3-pip
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# install build and runtime dependencies
|
||||
COPY requirements.txt requirements.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install -r requirements.txt
|
||||
|
||||
# install development dependencies
|
||||
COPY requirements-dev.txt requirements-dev.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install -r requirements-dev.txt
|
||||
|
||||
# image to build pytorch extensions
|
||||
FROM dev AS build
|
||||
|
||||
# copy input files
|
||||
COPY csrc csrc
|
||||
COPY setup.py setup.py
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY pyproject.toml pyproject.toml
|
||||
COPY vllm/__init__.py vllm/__init__.py
|
||||
|
||||
# max jobs used by Ninja to build extensions
|
||||
ENV MAX_JOBS=$max_jobs
|
||||
RUN python3 setup.py build_ext --inplace
|
||||
|
||||
# image to run unit testing suite
|
||||
FROM dev AS test
|
||||
|
||||
# copy pytorch extensions separately to avoid having to rebuild
|
||||
# when python code changes
|
||||
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
||||
COPY tests tests
|
||||
COPY vllm vllm
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "pytest", "tests"]
|
||||
|
||||
# use CUDA base as CUDA runtime dependencies are already installed via pip
|
||||
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS vllm-base
|
||||
|
||||
# libnccl required for ray
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y python3-pip
|
||||
|
||||
WORKDIR /workspace
|
||||
COPY requirements.txt requirements.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install -r requirements.txt
|
||||
|
||||
FROM vllm-base AS vllm
|
||||
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
||||
COPY vllm vllm
|
||||
|
||||
EXPOSE 8000
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
|
||||
|
||||
# openai api server alternative
|
||||
FROM vllm-base AS vllm-openai
|
||||
# install additional dependencies for openai api server
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install accelerate fschat
|
||||
|
||||
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
||||
COPY vllm vllm
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
|
@ -65,6 +65,7 @@ Documentation
|
||||
serving/distributed_serving
|
||||
serving/run_on_sky
|
||||
serving/deploying_with_triton
|
||||
serving/deploying_with_docker
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
21
docs/source/serving/deploying_with_docker.rst
Normal file
21
docs/source/serving/deploying_with_docker.rst
Normal file
@ -0,0 +1,21 @@
|
||||
.. _deploying_with_docker:
|
||||
|
||||
Deploying with Docker
|
||||
============================
|
||||
|
||||
You can build and run vLLM from source via the provided dockerfile. To build vLLM:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ DOCKER_BUILDKIT=1 docker build . --target vllm --tag vllm --build-arg max_jobs=8
|
||||
|
||||
To run vLLM:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ docker run --runtime nvidia --gpus all \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
-p 8000:8000 \
|
||||
--env "HUGGING_FACE_HUB_TOKEN=<secret>" \
|
||||
vllm <args...>
|
||||
|
@ -12,3 +12,4 @@ types-setuptools
|
||||
pytest
|
||||
pytest-forked
|
||||
pytest-asyncio
|
||||
|
||||
|
8
setup.py
8
setup.py
@ -239,8 +239,12 @@ def find_version(filepath: str):
|
||||
|
||||
|
||||
def read_readme() -> str:
|
||||
"""Read the README file."""
|
||||
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
|
||||
"""Read the README file if present."""
|
||||
p = get_path("README.md")
|
||||
if os.path.isfile(p):
|
||||
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def get_requirements() -> List[str]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user