Move dockerfiles into their own directory (#14549)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
f98a4920f9
commit
e6e3c55ef2
@ -3,7 +3,7 @@ steps:
|
|||||||
agents:
|
agents:
|
||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/upload-wheels.sh"
|
- "bash .buildkite/upload-wheels.sh"
|
||||||
@ -14,7 +14,7 @@ steps:
|
|||||||
agents:
|
agents:
|
||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/upload-wheels.sh"
|
- "bash .buildkite/upload-wheels.sh"
|
||||||
@ -31,7 +31,7 @@ steps:
|
|||||||
agents:
|
agents:
|
||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/upload-wheels.sh"
|
- "bash .buildkite/upload-wheels.sh"
|
||||||
@ -48,7 +48,7 @@ steps:
|
|||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
|
||||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
||||||
|
|
||||||
- label: "Build and publish TPU release image"
|
- label: "Build and publish TPU release image"
|
||||||
@ -57,7 +57,7 @@ steps:
|
|||||||
agents:
|
agents:
|
||||||
queue: tpu_queue_postmerge
|
queue: tpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f Dockerfile.tpu ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ."
|
||||||
- "docker push vllm/vllm-tpu:nightly"
|
- "docker push vllm/vllm-tpu:nightly"
|
||||||
- "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
|
- "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
|
||||||
plugins:
|
plugins:
|
||||||
@ -82,7 +82,7 @@ steps:
|
|||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
|
||||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
@ -10,5 +10,5 @@ trap remove_docker_container EXIT
|
|||||||
remove_docker_container
|
remove_docker_container
|
||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
docker build -t cpu-test -f Dockerfile.ppc64le .
|
docker build -t cpu-test -f docker/Dockerfile.ppc64le .
|
||||||
|
|
||||||
|
@ -18,8 +18,8 @@ trap remove_docker_container EXIT
|
|||||||
remove_docker_container
|
remove_docker_container
|
||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu .
|
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f docker/Dockerfile.cpu .
|
||||||
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu .
|
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f docker/Dockerfile.cpu .
|
||||||
|
|
||||||
# Run the image, setting --shm-size=4g for tensor parallel.
|
# Run the image, setting --shm-size=4g for tensor parallel.
|
||||||
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \
|
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \
|
||||||
|
@ -9,6 +9,7 @@ python3 use_existing_torch.py
|
|||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
DOCKER_BUILDKIT=1 docker build . \
|
DOCKER_BUILDKIT=1 docker build . \
|
||||||
|
--file docker/Dockerfile \
|
||||||
--target vllm-openai \
|
--target vllm-openai \
|
||||||
--platform "linux/arm64" \
|
--platform "linux/arm64" \
|
||||||
-t gh200-test \
|
-t gh200-test \
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
docker build -t hpu-test-env -f Dockerfile.hpu .
|
docker build -t hpu-test-env -f docker/Dockerfile.hpu .
|
||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
# certain versions of HPU software stack have a bug that can
|
# certain versions of HPU software stack have a bug that can
|
||||||
|
@ -35,7 +35,7 @@ else
|
|||||||
date "+%s" > /tmp/neuron-docker-build-timestamp
|
date "+%s" > /tmp/neuron-docker-build-timestamp
|
||||||
fi
|
fi
|
||||||
|
|
||||||
docker build -t "${image_name}" -f Dockerfile.neuron .
|
docker build -t "${image_name}" -f docker/Dockerfile.neuron .
|
||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
remove_docker_container() {
|
remove_docker_container() {
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Build the docker image.
|
# Build the docker image.
|
||||||
docker build -f Dockerfile.tpu -t vllm-tpu .
|
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
|
||||||
|
|
||||||
# Set up cleanup.
|
# Set up cleanup.
|
||||||
remove_docker_container() { docker rm -f tpu-test || true; }
|
remove_docker_container() { docker rm -f tpu-test || true; }
|
||||||
|
@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
|
|||||||
container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
|
container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
|
||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
docker build -t ${image_name} -f Dockerfile.xpu .
|
docker build -t ${image_name} -f docker/Dockerfile.xpu .
|
||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
remove_docker_container() {
|
remove_docker_container() {
|
||||||
|
2
.github/mergify.yml
vendored
2
.github/mergify.yml
vendored
@ -19,7 +19,7 @@ pull_request_rules:
|
|||||||
- files~=\.buildkite/
|
- files~=\.buildkite/
|
||||||
- files~=^cmake/
|
- files~=^cmake/
|
||||||
- files=CMakeLists.txt
|
- files=CMakeLists.txt
|
||||||
- files~=^Dockerfile
|
- files~=^docker/Dockerfile
|
||||||
- files~=^requirements.*\.txt
|
- files~=^requirements.*\.txt
|
||||||
- files=setup.py
|
- files=setup.py
|
||||||
actions:
|
actions:
|
||||||
|
2
.github/workflows/lint-and-deploy.yaml
vendored
2
.github/workflows/lint-and-deploy.yaml
vendored
@ -50,7 +50,7 @@ jobs:
|
|||||||
uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
|
uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
|
||||||
|
|
||||||
- name: Build the Docker image vllm cpu
|
- name: Build the Docker image vllm cpu
|
||||||
run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env .
|
run: docker buildx build -f docker/Dockerfile.cpu -t vllm-cpu-env .
|
||||||
|
|
||||||
- name: Configuration of docker images, network and namespace for the kind cluster
|
- name: Configuration of docker images, network and namespace for the kind cluster
|
||||||
run: |
|
run: |
|
||||||
|
@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
|
|||||||
#
|
#
|
||||||
# Note: the CUDA torch version is derived from pyproject.toml and various
|
# Note: the CUDA torch version is derived from pyproject.toml and various
|
||||||
# requirements.txt files and should be kept consistent. The ROCm torch
|
# requirements.txt files and should be kept consistent. The ROCm torch
|
||||||
# versions are derived from Dockerfile.rocm
|
# versions are derived from docker/Dockerfile.rocm
|
||||||
#
|
#
|
||||||
set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
|
set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
|
||||||
set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
|
set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Dockerfile
|
# Dockerfile
|
||||||
|
|
||||||
We provide a <gh-file:Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
|
We provide a <gh-file:docker/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
|
||||||
More information about deploying with Docker can be found [here](#deployment-docker).
|
More information about deploying with Docker can be found [here](#deployment-docker).
|
||||||
|
|
||||||
Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
|
Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
|
||||||
@ -28,7 +28,7 @@ The edges of the build graph represent:
|
|||||||
> Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present):
|
> Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present):
|
||||||
>
|
>
|
||||||
> ```bash
|
> ```bash
|
||||||
> dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename Dockerfile
|
> dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename docker/Dockerfile
|
||||||
> ```
|
> ```
|
||||||
>
|
>
|
||||||
> or in case you want to run it directly with the docker image:
|
> or in case you want to run it directly with the docker image:
|
||||||
@ -43,7 +43,7 @@ The edges of the build graph represent:
|
|||||||
> --output png \
|
> --output png \
|
||||||
> --dpi 200 \
|
> --dpi 200 \
|
||||||
> --max-label-length 50 \
|
> --max-label-length 50 \
|
||||||
> --filename Dockerfile \
|
> --filename docker/Dockerfile \
|
||||||
> --legend
|
> --legend
|
||||||
> ```
|
> ```
|
||||||
>
|
>
|
||||||
|
@ -45,7 +45,7 @@ pytest tests/
|
|||||||
```
|
```
|
||||||
|
|
||||||
:::{tip}
|
:::{tip}
|
||||||
Since the <gh-file:Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
|
Since the <gh-file:docker/Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
|
||||||
|
|
||||||
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
|
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
|
||||||
:::
|
:::
|
||||||
|
@ -61,11 +61,11 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git
|
|||||||
|
|
||||||
## Building vLLM's Docker Image from Source
|
## Building vLLM's Docker Image from Source
|
||||||
|
|
||||||
You can build and run vLLM from source via the provided <gh-file:Dockerfile>. To build vLLM:
|
You can build and run vLLM from source via the provided <gh-file:docker/Dockerfile>. To build vLLM:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
# optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
|
# optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
|
||||||
DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai
|
DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile
|
||||||
```
|
```
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
@ -92,6 +92,7 @@ Keep an eye on memory usage with parallel jobs as it can be substantial (see exa
|
|||||||
# Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
|
# Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
|
||||||
$ python3 use_existing_torch.py
|
$ python3 use_existing_torch.py
|
||||||
$ DOCKER_BUILDKIT=1 docker build . \
|
$ DOCKER_BUILDKIT=1 docker build . \
|
||||||
|
--file docker/Dockerfile \
|
||||||
--target vllm-openai \
|
--target vllm-openai \
|
||||||
--platform "linux/arm64" \
|
--platform "linux/arm64" \
|
||||||
-t vllm/vllm-gh200-openai:latest \
|
-t vllm/vllm-gh200-openai:latest \
|
||||||
|
@ -69,14 +69,14 @@ server {
|
|||||||
|
|
||||||
```console
|
```console
|
||||||
cd $vllm_root
|
cd $vllm_root
|
||||||
docker build -f Dockerfile . --tag vllm
|
docker build -f docker/Dockerfile . --tag vllm
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are behind proxy, you can pass the proxy settings to the docker build command as shown below:
|
If you are behind proxy, you can pass the proxy settings to the docker build command as shown below:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
cd $vllm_root
|
cd $vllm_root
|
||||||
docker build -f Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
|
docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
|
||||||
```
|
```
|
||||||
|
|
||||||
(nginxloadbalancer-nginx-docker-network)=
|
(nginxloadbalancer-nginx-docker-network)=
|
||||||
|
@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images.
|
|||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
```console
|
```console
|
||||||
docker build -f Dockerfile.hpu -t vllm-hpu-env .
|
docker build -f docker/Dockerfile.hpu -t vllm-hpu-env .
|
||||||
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env
|
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images.
|
|||||||
|
|
||||||
See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
|
See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
|
||||||
|
|
||||||
Make sure to use <gh-file:Dockerfile.neuron> in place of the default Dockerfile.
|
Make sure to use <gh-file:docker/Dockerfile.neuron> in place of the default Dockerfile.
|
||||||
|
|
||||||
## Extra information
|
## Extra information
|
||||||
|
|
||||||
|
@ -169,10 +169,10 @@ See <project:#deployment-docker-pre-built-image> for instructions on using the o
|
|||||||
|
|
||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
You can use <gh-file:Dockerfile.tpu> to build a Docker image with TPU support.
|
You can use <gh-file:docker/Dockerfile.tpu> to build a Docker image with TPU support.
|
||||||
|
|
||||||
```console
|
```console
|
||||||
docker build -f Dockerfile.tpu -t vllm-tpu .
|
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
|
||||||
```
|
```
|
||||||
|
|
||||||
Run the Docker image with the following command:
|
Run the Docker image with the following command:
|
||||||
|
@ -177,7 +177,7 @@ Currently, there are no pre-built CPU wheels.
|
|||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ docker build -f Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
|
$ docker build -f docker/Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
|
||||||
|
|
||||||
# Launching OpenAI server
|
# Launching OpenAI server
|
||||||
$ docker run --rm \
|
$ docker run --rm \
|
||||||
@ -193,11 +193,11 @@ $ docker run --rm \
|
|||||||
```
|
```
|
||||||
|
|
||||||
::::{tip}
|
::::{tip}
|
||||||
For ARM or Apple silicon, use `Dockerfile.arm`
|
For ARM or Apple silicon, use `docker/Dockerfile.arm`
|
||||||
::::
|
::::
|
||||||
|
|
||||||
::::{tip}
|
::::{tip}
|
||||||
For IBM Z (s390x), use `Dockerfile.s390x` and in `docker run` use flag `--dtype float`
|
For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float`
|
||||||
::::
|
::::
|
||||||
|
|
||||||
## Supported features
|
## Supported features
|
||||||
|
@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO
|
|||||||
|
|
||||||
#### (Optional) Build an image with ROCm software stack
|
#### (Optional) Build an image with ROCm software stack
|
||||||
|
|
||||||
Build a docker image from <gh-file:Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
|
Build a docker image from <gh-file:docker/Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
|
||||||
**This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.**
|
**This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.**
|
||||||
If you choose to build this rocm_base image yourself, the steps are as follows.
|
If you choose to build this rocm_base image yourself, the steps are as follows.
|
||||||
|
|
||||||
@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either
|
|||||||
To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
|
To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm_base -t rocm/vllm-dev:base .
|
DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm_base -t rocm/vllm-dev:base .
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Build an image with vLLM
|
#### Build an image with vLLM
|
||||||
|
|
||||||
First, build a docker image from <gh-file:Dockerfile.rocm> and launch a docker container from the image.
|
First, build a docker image from <gh-file:docker/Dockerfile.rocm> and launch a docker container from the image.
|
||||||
It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon:
|
It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
<gh-file:Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
|
<gh-file:docker/Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
|
||||||
It provides flexibility to customize the build of docker image using the following arguments:
|
It provides flexibility to customize the build of docker image using the following arguments:
|
||||||
|
|
||||||
- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:Dockerfile.rocm_base>
|
- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:docker/Dockerfile.rocm_base>
|
||||||
- `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
|
- `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
|
||||||
- `BUILD_RPD`: Include RocmProfileData profiling tool in the image
|
- `BUILD_RPD`: Include RocmProfileData profiling tool in the image
|
||||||
- `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
|
- `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
|
||||||
@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt
|
|||||||
To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
|
To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm .
|
DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm -t vllm-rocm .
|
||||||
```
|
```
|
||||||
|
|
||||||
To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image:
|
To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f Dockerfile.rocm -t vllm-rocm .
|
DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f docker/Dockerfile.rocm -t vllm-rocm .
|
||||||
```
|
```
|
||||||
|
|
||||||
To run the above docker image `vllm-rocm`, use the below command:
|
To run the above docker image `vllm-rocm`, use the below command:
|
||||||
|
@ -54,7 +54,7 @@ Currently, there are no pre-built XPU images.
|
|||||||
### Build image from source
|
### Build image from source
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ docker build -f Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
|
$ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
|
||||||
$ docker run -it \
|
$ docker run -it \
|
||||||
--rm \
|
--rm \
|
||||||
--network=host \
|
--network=host \
|
||||||
|
@ -208,5 +208,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a
|
|||||||
If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
|
If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
|
||||||
|
|
||||||
```{attention}
|
```{attention}
|
||||||
There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see [Dockerfile](https://github.com/vllm-project/vllm/blob/main/Dockerfile) for instructions on how to install it.
|
There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:docker/Dockerfile> for instructions on how to install it.
|
||||||
```
|
```
|
||||||
|
@ -317,8 +317,8 @@ class ModelConfig:
|
|||||||
) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
|
) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
|
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
|
||||||
"module was not found."
|
"module was not found. See "
|
||||||
"See https://github.com/vllm-project/vllm/blob/main/Dockerfile "
|
"https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile " # noqa: E501
|
||||||
"for instructions on how to install it.")
|
"for instructions on how to install it.")
|
||||||
|
|
||||||
# The tokenizer version is consistent with the model version by default.
|
# The tokenizer version is consistent with the model version by default.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user