diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index a1dcb01e..a420759a 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -3,7 +3,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -14,7 +14,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -31,7 +31,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -48,7 +48,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - label: "Build and publish TPU release image" @@ -57,7 +57,7 @@ steps: agents: queue: tpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f Dockerfile.tpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ." - "docker push vllm/vllm-tpu:nightly" - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT" plugins: @@ -82,7 +82,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh index bc06838d..9c5cf7ca 100755 --- a/.buildkite/run-cpu-test-ppc64le.sh +++ b/.buildkite/run-cpu-test-ppc64le.sh @@ -10,5 +10,5 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -docker build -t cpu-test -f Dockerfile.ppc64le . +docker build -t cpu-test -f docker/Dockerfile.ppc64le . diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index bf9f191d..40f3df96 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -18,8 +18,8 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f docker/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f docker/Dockerfile.cpu . # Run the image, setting --shm-size=4g for tensor parallel. docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \ diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh index 5c004b47..8c64e146 100644 --- a/.buildkite/run-gh200-test.sh +++ b/.buildkite/run-gh200-test.sh @@ -9,6 +9,7 @@ python3 use_existing_torch.py # Try building the docker image DOCKER_BUILDKIT=1 docker build . \ + --file docker/Dockerfile \ --target vllm-openai \ --platform "linux/arm64" \ -t gh200-test \ diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh index f83eb927..95b6ac37 100644 --- a/.buildkite/run-hpu-test.sh +++ b/.buildkite/run-hpu-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t hpu-test-env -f Dockerfile.hpu . +docker build -t hpu-test-env -f docker/Dockerfile.hpu . # Setup cleanup # certain versions of HPU software stack have a bug that can diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh index ad5ae6f4..ec6a080e 100644 --- a/.buildkite/run-neuron-test.sh +++ b/.buildkite/run-neuron-test.sh @@ -35,7 +35,7 @@ else date "+%s" > /tmp/neuron-docker-build-timestamp fi -docker build -t "${image_name}" -f Dockerfile.neuron . +docker build -t "${image_name}" -f docker/Dockerfile.neuron . # Setup cleanup remove_docker_container() { diff --git a/.buildkite/run-tpu-v1-test.sh b/.buildkite/run-tpu-v1-test.sh index 8616ea2b..4aac57cc 100755 --- a/.buildkite/run-tpu-v1-test.sh +++ b/.buildkite/run-tpu-v1-test.sh @@ -3,7 +3,7 @@ set -e # Build the docker image. -docker build -f Dockerfile.tpu -t vllm-tpu . +docker build -f docker/Dockerfile.tpu -t vllm-tpu . # Set up cleanup. remove_docker_container() { docker rm -f tpu-test || true; } diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh index 3a0e6bdb..f54010c4 100644 --- a/.buildkite/run-xpu-test.sh +++ b/.buildkite/run-xpu-test.sh @@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}" container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" # Try building the docker image -docker build -t ${image_name} -f Dockerfile.xpu . +docker build -t ${image_name} -f docker/Dockerfile.xpu . # Setup cleanup remove_docker_container() { diff --git a/.github/mergify.yml b/.github/mergify.yml index e071ece6..3097b994 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -19,7 +19,7 @@ pull_request_rules: - files~=\.buildkite/ - files~=^cmake/ - files=CMakeLists.txt - - files~=^Dockerfile + - files~=^docker/Dockerfile - files~=^requirements.*\.txt - files=setup.py actions: diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml index b199d086..7b1d9f69 100644 --- a/.github/workflows/lint-and-deploy.yaml +++ b/.github/workflows/lint-and-deploy.yaml @@ -50,7 +50,7 @@ jobs: uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 - name: Build the Docker image vllm cpu - run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env . + run: docker buildx build -f docker/Dockerfile.cpu -t vllm-cpu-env . - name: Configuration of docker images, network and namespace for the kind cluster run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index ab6185e9..d0436aa1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1 # # Note: the CUDA torch version is derived from pyproject.toml and various # requirements.txt files and should be kept consistent. The ROCm torch -# versions are derived from Dockerfile.rocm +# versions are derived from docker/Dockerfile.rocm # set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0") set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0") diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/Dockerfile.arm b/docker/Dockerfile.arm similarity index 100% rename from Dockerfile.arm rename to docker/Dockerfile.arm diff --git a/Dockerfile.cpu b/docker/Dockerfile.cpu similarity index 100% rename from Dockerfile.cpu rename to docker/Dockerfile.cpu diff --git a/Dockerfile.hpu b/docker/Dockerfile.hpu similarity index 100% rename from Dockerfile.hpu rename to docker/Dockerfile.hpu diff --git a/Dockerfile.neuron b/docker/Dockerfile.neuron similarity index 100% rename from Dockerfile.neuron rename to docker/Dockerfile.neuron diff --git a/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le similarity index 100% rename from Dockerfile.ppc64le rename to docker/Dockerfile.ppc64le diff --git a/Dockerfile.rocm b/docker/Dockerfile.rocm similarity index 100% rename from Dockerfile.rocm rename to docker/Dockerfile.rocm diff --git a/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base similarity index 100% rename from Dockerfile.rocm_base rename to docker/Dockerfile.rocm_base diff --git a/Dockerfile.s390x b/docker/Dockerfile.s390x similarity index 100% rename from Dockerfile.s390x rename to docker/Dockerfile.s390x diff --git a/Dockerfile.tpu b/docker/Dockerfile.tpu similarity index 100% rename from Dockerfile.tpu rename to docker/Dockerfile.tpu diff --git a/Dockerfile.xpu b/docker/Dockerfile.xpu similarity index 100% rename from Dockerfile.xpu rename to docker/Dockerfile.xpu diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md index 96674805..90b9a33c 100644 --- a/docs/source/contributing/dockerfile/dockerfile.md +++ b/docs/source/contributing/dockerfile/dockerfile.md @@ -1,6 +1,6 @@ # Dockerfile -We provide a to construct the image for running an OpenAI compatible server with vLLM. +We provide a to construct the image for running an OpenAI compatible server with vLLM. More information about deploying with Docker can be found [here](#deployment-docker). Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes: @@ -28,7 +28,7 @@ The edges of the build graph represent: > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present): > > ```bash - > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename Dockerfile + > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename docker/Dockerfile > ``` > > or in case you want to run it directly with the docker image: @@ -43,7 +43,7 @@ The edges of the build graph represent: > --output png \ > --dpi 200 \ > --max-label-length 50 \ - > --filename Dockerfile \ + > --filename docker/Dockerfile \ > --legend > ``` > diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md index 1e6f73dd..31c7059f 100644 --- a/docs/source/contributing/overview.md +++ b/docs/source/contributing/overview.md @@ -45,7 +45,7 @@ pytest tests/ ``` :::{tip} -Since the ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12. +Since the ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12. Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. ::: diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md index 65cb038d..1ccb04ac 100644 --- a/docs/source/deployment/docker.md +++ b/docs/source/deployment/docker.md @@ -61,11 +61,11 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git ## Building vLLM's Docker Image from Source -You can build and run vLLM from source via the provided . To build vLLM: +You can build and run vLLM from source via the provided . To build vLLM: ```console # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2 -DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai +DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile ``` :::{note} @@ -92,6 +92,7 @@ Keep an eye on memory usage with parallel jobs as it can be substantial (see exa # Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB) $ python3 use_existing_torch.py $ DOCKER_BUILDKIT=1 docker build . \ + --file docker/Dockerfile \ --target vllm-openai \ --platform "linux/arm64" \ -t vllm/vllm-gh200-openai:latest \ diff --git a/docs/source/deployment/nginx.md b/docs/source/deployment/nginx.md index 62816f51..bf404f10 100644 --- a/docs/source/deployment/nginx.md +++ b/docs/source/deployment/nginx.md @@ -69,14 +69,14 @@ server { ```console cd $vllm_root -docker build -f Dockerfile . --tag vllm +docker build -f docker/Dockerfile . --tag vllm ``` If you are behind proxy, you can pass the proxy settings to the docker build command as shown below: ```console cd $vllm_root -docker build -f Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy +docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy ``` (nginxloadbalancer-nginx-docker-network)= diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index e91ed6fb..e3046f35 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images. ### Build image from source ```console -docker build -f Dockerfile.hpu -t vllm-hpu-env . +docker build -f docker/Dockerfile.hpu -t vllm-hpu-env . docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md index 4c668a8e..b4bfb696 100644 --- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md @@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images. See for instructions on building the Docker image. -Make sure to use in place of the default Dockerfile. +Make sure to use in place of the default Dockerfile. ## Extra information diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index 6c7bbf60..beb803cf 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -169,10 +169,10 @@ See for instructions on using the o ### Build image from source -You can use to build a Docker image with TPU support. +You can use to build a Docker image with TPU support. ```console -docker build -f Dockerfile.tpu -t vllm-tpu . +docker build -f docker/Dockerfile.tpu -t vllm-tpu . ``` Run the Docker image with the following command: diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md index 844b184a..e7e12bd6 100644 --- a/docs/source/getting_started/installation/cpu.md +++ b/docs/source/getting_started/installation/cpu.md @@ -177,7 +177,7 @@ Currently, there are no pre-built CPU wheels. ### Build image from source ```console -$ docker build -f Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai . +$ docker build -f docker/Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai . # Launching OpenAI server $ docker run --rm \ @@ -193,11 +193,11 @@ $ docker run --rm \ ``` ::::{tip} -For ARM or Apple silicon, use `Dockerfile.arm` +For ARM or Apple silicon, use `docker/Dockerfile.arm` :::: ::::{tip} -For IBM Z (s390x), use `Dockerfile.s390x` and in `docker run` use flag `--dtype float` +For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float` :::: ## Supported features diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index cdd48769..eae7a235 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO #### (Optional) Build an image with ROCm software stack -Build a docker image from which setup ROCm software stack needed by the vLLM. +Build a docker image from which setup ROCm software stack needed by the vLLM. **This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.** If you choose to build this rocm_base image yourself, the steps are as follows. @@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm_base -t rocm/vllm-dev:base . +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm_base -t rocm/vllm-dev:base . ``` #### Build an image with vLLM -First, build a docker image from and launch a docker container from the image. +First, build a docker image from and launch a docker container from the image. It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon: ```console @@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either } ``` - uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. + uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. It provides flexibility to customize the build of docker image using the following arguments: -- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using +- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build - `BUILD_RPD`: Include RocmProfileData profiling tool in the image - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image @@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm -t vllm-rocm . ``` To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image: ```console -DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f docker/Dockerfile.rocm -t vllm-rocm . ``` To run the above docker image `vllm-rocm`, use the below command: diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index 84a9b387..c41905f2 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -54,7 +54,7 @@ Currently, there are no pre-built XPU images. ### Build image from source ```console -$ docker build -f Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . +$ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . $ docker run -it \ --rm \ --network=host \ diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index b5246c41..25189b00 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -208,5 +208,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`. ```{attention} -There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see [Dockerfile](https://github.com/vllm-project/vllm/blob/main/Dockerfile) for instructions on how to install it. +There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see for instructions on how to install it. ``` diff --git a/vllm/config.py b/vllm/config.py index b06f1196..1dd93591 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -317,8 +317,8 @@ class ModelConfig: ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: raise ValueError( "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer " - "module was not found." - "See https://github.com/vllm-project/vllm/blob/main/Dockerfile " + "module was not found. See " + "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile " # noqa: E501 "for instructions on how to install it.") # The tokenizer version is consistent with the model version by default.