diff --git a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh index 32bd34c4..4d01a314 100644 --- a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh +++ b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh @@ -426,7 +426,7 @@ main() { pip install -U transformers - pip install -r requirements-dev.txt + pip install -r requirements/dev.txt which genai-perf # check storage diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index 2ead1f51..f6dad818 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -35,7 +35,7 @@ function cpu_tests() { # Run basic model test docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c " set -e - pip install -r vllm/requirements-test.txt + pip install -r vllm/requirements/test.txt pytest -v -s tests/models/decoder_only/language -m cpu_model pytest -v -s tests/models/embedding/language -m cpu_model pytest -v -s tests/models/encoder_decoder/language -m cpu_model diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f60aeaf9..2af76cb2 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -35,7 +35,7 @@ steps: fast_check: true no_gpu: True commands: - - pip install -r requirements-docs.txt + - pip install -r ../../requirements/docs.txt - SPHINXOPTS=\"-W\" make html # Check API reference (if it fails, you may have missing mock imports) - grep \"sig sig-object py\" build/html/api/inference_params.html diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e40ceaaa..fc6739eb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -50,7 +50,7 @@ jobs: # matrix: # os: ['ubuntu-20.04'] # python-version: ['3.9', '3.10', '3.11', '3.12'] - # pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt. + # pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements/cuda.txt. # cuda-version: ['11.8', '12.1'] # steps: diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index 122e4e10..0f010832 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -9,7 +9,7 @@ PATH=${cuda_home}/bin:$PATH LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH # Install requirements -$python_executable -m pip install -r requirements-build.txt -r requirements-cuda.txt +$python_executable -m pip install -r requirements/build.txt -r requirements/cuda.txt # Limit the number of parallel jobs to avoid OOM export MAX_JOBS=1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 23a38d49..074ac9d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,8 +44,8 @@ repos: rev: 0.6.2 hooks: - id: pip-compile - args: [requirements-test.in, -o, requirements-test.txt] - files: ^requirements-test\.(in|txt)$ + args: [requirements/test.in, -o, requirements/test.txt] + files: ^requirements/test\.(in|txt)$ - repo: local hooks: - id: mypy-local diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 284196bc..2781ec22 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,4 +18,4 @@ formats: [] # Optionally declare the Python requirements required to build your docs python: install: - - requirements: docs/requirements-docs.txt + - requirements: requirements/docs.txt diff --git a/Dockerfile b/Dockerfile index ece22ed3..ff4a0839 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,10 +55,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --index-url https://download.pytorch.org/whl/nightly/cu126 "torch==2.7.0.dev20250121+cu126" "torchvision==0.22.0.dev20250121"; \ fi -COPY requirements-common.txt requirements-common.txt -COPY requirements-cuda.txt requirements-cuda.txt +COPY requirements/common.txt requirements/common.txt +COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements-cuda.txt + uv pip install -r requirements/cuda.txt # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -76,14 +76,14 @@ FROM base AS build ARG TARGETPLATFORM # install build dependencies -COPY requirements-build.txt requirements-build.txt +COPY requirements/build.txt requirements/build.txt # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements-build.txt + uv pip install -r requirements/build.txt COPY . . ARG GIT_REPO_CHECK=0 @@ -151,11 +151,11 @@ FROM base as dev # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 -COPY requirements-lint.txt requirements-lint.txt -COPY requirements-test.txt requirements-test.txt -COPY requirements-dev.txt requirements-dev.txt +COPY requirements/lint.txt requirements/lint.txt +COPY requirements/test.txt requirements/test.txt +COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements-dev.txt + uv pip install -r requirements/dev.txt #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### @@ -230,9 +230,9 @@ COPY examples examples # some issues w.r.t. JIT compilation. Therefore we need to # install build dependencies for JIT compilation. # TODO: Remove this once FlashInfer AOT wheel is fixed -COPY requirements-build.txt requirements-build.txt +COPY requirements/build.txt requirements/build.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements-build.txt + uv pip install -r requirements/build.txt #################### vLLM installation IMAGE #################### @@ -249,7 +249,7 @@ ENV UV_HTTP_TIMEOUT=500 # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements-dev.txt + uv pip install -r requirements/dev.txt # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/Dockerfile.arm b/Dockerfile.arm index 093ee220..bad09368 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -26,18 +26,18 @@ WORKDIR /workspace ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \ + --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \ pip install --upgrade pip && \ - pip install -r requirements-build.txt + pip install -r requirements/build.txt FROM cpu-test-arm AS build WORKDIR /workspace/vllm RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \ - --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \ - pip install -v -r requirements-cpu.txt + --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ + --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ + pip install -v -r requirements/cpu.txt COPY . . ARG GIT_REPO_CHECK=0 diff --git a/Dockerfile.cpu b/Dockerfile.cpu index ebe226cf..08a4e188 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -29,18 +29,18 @@ WORKDIR /workspace ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \ + --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \ pip install --upgrade pip && \ - pip install -r requirements-build.txt + pip install -r requirements/build.txt FROM cpu-test-1 AS build WORKDIR /workspace/vllm RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \ - --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \ - pip install -v -r requirements-cpu.txt + --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ + --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ + pip install -v -r requirements/cpu.txt COPY . . ARG GIT_REPO_CHECK=0 diff --git a/Dockerfile.hpu b/Dockerfile.hpu index 66cf68c3..48211c88 100644 --- a/Dockerfile.hpu +++ b/Dockerfile.hpu @@ -4,7 +4,7 @@ COPY ./ /workspace/vllm WORKDIR /workspace/vllm -RUN pip install -v -r requirements-hpu.txt +RUN pip install -v -r requirements/hpu.txt ENV no_proxy=localhost,127.0.0.1 ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true diff --git a/Dockerfile.neuron b/Dockerfile.neuron index 27658d83..06764590 100644 --- a/Dockerfile.neuron +++ b/Dockerfile.neuron @@ -36,7 +36,7 @@ RUN --mount=type=bind,source=.git,target=.git \ RUN python3 -m pip install -U \ 'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \ - -r requirements-neuron.txt + -r requirements/neuron.txt ENV VLLM_TARGET_DEVICE neuron RUN --mount=type=bind,source=.git,target=.git \ diff --git a/Dockerfile.openvino b/Dockerfile.openvino index 32bcbfa9..445c70ab 100644 --- a/Dockerfile.openvino +++ b/Dockerfile.openvino @@ -16,7 +16,7 @@ RUN --mount=type=bind,source=.git,target=.git \ RUN python3 -m pip install -U pip # install build requirements -RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements-build.txt +RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements/build.txt # build vLLM with OpenVINO backend RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index c4c1f3e3..c5ca20d7 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -6,7 +6,7 @@ ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/" RUN apt-get update -y && apt-get install -y git wget kmod curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1 libssl-dev -# Some packages in requirements-cpu are installed here +# Some packages in requirements/cpu are installed here # IBM provides optimized packages for ppc64le processors in the open-ce project for mamba # Currently these may not be available for venv or pip directly RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 rust && micromamba clean --all --yes @@ -21,7 +21,7 @@ RUN --mount=type=bind,source=.git,target=.git \ RUN --mount=type=cache,target=/root/.cache/pip \ RUSTFLAGS='-L /opt/conda/lib' pip install -v --prefer-binary --extra-index-url https://repo.fury.io/mgiessing \ 'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \ - -r requirements-cpu.txt \ + -r requirements/cpu.txt \ xformers uvloop==0.20.0 RUN --mount=type=bind,source=.git,target=.git \ diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 14c522af..02ccb8ef 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -38,7 +38,7 @@ FROM fetch_vllm AS build_vllm ARG USE_CYTHON # Build vLLM RUN cd vllm \ - && python3 -m pip install -r requirements-rocm.txt \ + && python3 -m pip install -r requirements/rocm.txt \ && python3 setup.py clean --all \ && if [ ${USE_CYTHON} -eq "1" ]; then python3 setup_cython.py build_ext --inplace; fi \ && python3 setup.py bdist_wheel --dist-dir=dist @@ -60,7 +60,7 @@ RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/* # Install vLLM RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ cd /install \ - && pip install -U -r requirements-rocm.txt \ + && pip install -U -r requirements/rocm.txt \ && pip uninstall -y vllm \ && pip install *.whl @@ -99,7 +99,7 @@ RUN if [ ${BUILD_RPD} -eq "1" ]; then \ # Install vLLM RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ cd /install \ - && pip install -U -r requirements-rocm.txt \ + && pip install -U -r requirements/rocm.txt \ && pip uninstall -y vllm \ && pip install *.whl diff --git a/Dockerfile.s390x b/Dockerfile.s390x index b499d4cb..5a84dc12 100644 --- a/Dockerfile.s390x +++ b/Dockerfile.s390x @@ -58,7 +58,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ cd ../../python && \ export PYARROW_PARALLEL=4 && \ export ARROW_BUILD_TYPE=release && \ - uv pip install -r requirements-build.txt && \ + uv pip install -r requirements/build.txt && \ python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel FROM python-install AS numa-build @@ -120,7 +120,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \ --mount=type=bind,from=pyarrow,source=/tmp/arrow/python/dist,target=/tmp/arrow-wheels \ --mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \ - sed -i '/^torch/d' requirements-build.txt && \ + sed -i '/^torch/d' requirements/build.txt && \ ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl | head -n 1) && \ VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl | head -n 1) && \ uv pip install -v \ @@ -128,8 +128,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \ $VISION_WHL_FILE \ --extra-index-url https://download.pytorch.org/whl/nightly/cpu \ --index-strategy unsafe-best-match \ - -r requirements-build.txt \ - -r requirements-cpu.txt + -r requirements/build.txt \ + -r requirements/cpu.txt # Build and install vllm RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/Dockerfile.tpu b/Dockerfile.tpu index e268b394..960dc8e9 100644 --- a/Dockerfile.tpu +++ b/Dockerfile.tpu @@ -19,7 +19,7 @@ ENV VLLM_TARGET_DEVICE="tpu" RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ python3 -m pip install \ - -r requirements-tpu.txt + -r requirements/tpu.txt RUN python3 setup.py develop # install development dependencies (for testing) diff --git a/Dockerfile.xpu b/Dockerfile.xpu index a374f20d..530809bc 100644 --- a/Dockerfile.xpu +++ b/Dockerfile.xpu @@ -25,12 +25,12 @@ RUN apt-get update -y && \ wget WORKDIR /workspace/vllm -COPY requirements-xpu.txt /workspace/vllm/requirements-xpu.txt -COPY requirements-common.txt /workspace/vllm/requirements-common.txt +COPY requirements/xpu.txt /workspace/vllm/requirements/xpu.txt +COPY requirements/common.txt /workspace/vllm/requirements/common.txt RUN --mount=type=cache,target=/root/.cache/pip \ pip install --no-cache-dir \ - -r requirements-xpu.txt + -r requirements/xpu.txt RUN git clone https://github.com/intel/pti-gpu && \ cd pti-gpu/sdk && \ diff --git a/MANIFEST.in b/MANIFEST.in index 82be639e..82fd22b8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,9 @@ include LICENSE -include requirements-common.txt -include requirements-cuda.txt -include requirements-rocm.txt -include requirements-neuron.txt -include requirements-cpu.txt +include requirements/common.txt +include requirements/cuda.txt +include requirements/rocm.txt +include requirements/neuron.txt +include requirements/cpu.txt include CMakeLists.txt recursive-include cmake * diff --git a/docs/README.md b/docs/README.md index 1a44c134..74e05ce0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,7 +4,7 @@ ```bash # Install dependencies. -pip install -r requirements-docs.txt +pip install -r ../requirements/docs.txt # Build the docs. make clean diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md index 5f8f5525..a4141183 100644 --- a/docs/source/contributing/overview.md +++ b/docs/source/contributing/overview.md @@ -23,7 +23,7 @@ Check out the [building from source](#build-from-source) documentation for detai ## Testing ```bash -pip install -r requirements-dev.txt +pip install -r requirements/dev.txt # Linting, formatting and static type checking pre-commit install --hook-type pre-commit --hook-type commit-msg diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index f3b0d6dc..7e52f604 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -63,7 +63,7 @@ To build and install vLLM from source, run: ```console git clone https://github.com/vllm-project/vllm.git cd vllm -pip install -r requirements-hpu.txt +pip install -r requirements/hpu.txt python setup.py develop ``` @@ -73,7 +73,7 @@ Currently, the latest features and performance optimizations are developed in Ga git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork git checkout habana_main -pip install -r requirements-hpu.txt +pip install -r requirements/hpu.txt python setup.py develop ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md index f149818a..4c668a8e 100644 --- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md @@ -116,7 +116,7 @@ Once neuronx-cc and transformers-neuronx packages are installed, we will be able ```console git clone https://github.com/vllm-project/vllm.git cd vllm -pip install -U -r requirements-neuron.txt +pip install -U -r requirements/neuron.txt VLLM_TARGET_DEVICE="neuron" pip install . ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md index 4f25252d..5641c156 100644 --- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md @@ -32,7 +32,7 @@ Second, clone vLLM and install prerequisites for the vLLM OpenVINO backend insta ```console git clone https://github.com/vllm-project/vllm.git cd vllm -pip install -r requirements-build.txt --extra-index-url https://download.pytorch.org/whl/cpu +pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu ``` Finally, install vLLM with OpenVINO backend: diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index c0d50fea..6c7bbf60 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -151,7 +151,7 @@ pip uninstall torch torch-xla -y Install build dependencies: ```bash -pip install -r requirements-tpu.txt +pip install -r requirements/tpu.txt sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev ``` diff --git a/docs/source/getting_started/installation/cpu/apple.inc.md b/docs/source/getting_started/installation/cpu/apple.inc.md index 3bf1d47f..7bc9e85e 100644 --- a/docs/source/getting_started/installation/cpu/apple.inc.md +++ b/docs/source/getting_started/installation/cpu/apple.inc.md @@ -25,7 +25,7 @@ After installation of XCode and the Command Line Tools, which include Apple Clan ```console git clone https://github.com/vllm-project/vllm.git cd vllm -pip install -r requirements-cpu.txt +pip install -r requirements/cpu.txt pip install -e . ``` diff --git a/docs/source/getting_started/installation/cpu/build.inc.md b/docs/source/getting_started/installation/cpu/build.inc.md index 46329e9b..39d9dfbd 100644 --- a/docs/source/getting_started/installation/cpu/build.inc.md +++ b/docs/source/getting_started/installation/cpu/build.inc.md @@ -18,7 +18,7 @@ Third, install Python packages for vLLM CPU backend building: ```console pip install --upgrade pip pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy -pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu +pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu ``` Finally, build and install vLLM CPU backend: diff --git a/docs/source/getting_started/installation/gpu/cuda.inc.md b/docs/source/getting_started/installation/gpu/cuda.inc.md index 2477c3e4..7e3b884c 100644 --- a/docs/source/getting_started/installation/gpu/cuda.inc.md +++ b/docs/source/getting_started/installation/gpu/cuda.inc.md @@ -148,7 +148,7 @@ To build vLLM using an existing PyTorch installation: git clone https://github.com/vllm-project/vllm.git cd vllm python use_existing_torch.py -pip install -r requirements-build.txt +pip install -r requirements/build.txt pip install -e . --no-build-isolation ``` diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index 84e7f650..4381cef5 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -84,7 +84,7 @@ Currently, there are no pre-built ROCm wheels. # Install dependencies $ pip install --upgrade numba scipy huggingface-hub[cli,hf_transfer] setuptools_scm $ pip install "numpy<2" - $ pip install -r requirements-rocm.txt + $ pip install -r requirements/rocm.txt # Build vLLM for MI210/MI250/MI300. $ export PYTORCH_ROCM_ARCH="gfx90a;gfx942" diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index ef02d9a0..9678c25b 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -25,7 +25,7 @@ Currently, there are no pre-built XPU wheels. ```console source /opt/intel/oneapi/setvars.sh pip install --upgrade pip -pip install -v -r requirements-xpu.txt +pip install -v -r requirements/xpu.txt ``` - Finally, build and install vLLM XPU backend: diff --git a/format.sh b/format.sh index 3e78bf98..fb503ec4 100755 --- a/format.sh +++ b/format.sh @@ -1,6 +1,6 @@ #!/bin/bash echo "vLLM linting system has been moved from format.sh to pre-commit hook." -echo "Please run 'pip install -r requirements-lint.txt', followed by" +echo "Please run 'pip install -r requirements/lint.txt', followed by" echo "'pre-commit install --hook-type pre-commit --hook-type commit-msg' to install the pre-commit hook." echo "Then linters will run automatically before each commit." diff --git a/pyproject.toml b/pyproject.toml index 8a127ebb..2eafffaa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -# Should be mirrored in requirements-build.txt +# Should be mirrored in requirements/build.txt requires = [ "cmake>=3.26", "ninja", diff --git a/requirements-build.txt b/requirements/build.txt similarity index 100% rename from requirements-build.txt rename to requirements/build.txt diff --git a/requirements-common.txt b/requirements/common.txt similarity index 100% rename from requirements-common.txt rename to requirements/common.txt diff --git a/requirements-cpu.txt b/requirements/cpu.txt similarity index 96% rename from requirements-cpu.txt rename to requirements/cpu.txt index 9491e27d..ba059d3f 100644 --- a/requirements-cpu.txt +++ b/requirements/cpu.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt # Dependencies for CPUs torch==2.5.1+cpu; platform_machine != "ppc64le" and platform_machine != "aarch64" and platform_system != "Darwin" and platform_machine != "s390x" diff --git a/requirements-cuda.txt b/requirements/cuda.txt similarity index 95% rename from requirements-cuda.txt rename to requirements/cuda.txt index b641ea7f..46bb1736 100644 --- a/requirements-cuda.txt +++ b/requirements/cuda.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding diff --git a/requirements-dev.txt b/requirements/dev.txt similarity index 67% rename from requirements-dev.txt rename to requirements/dev.txt index 421aa2e7..e75821eb 100644 --- a/requirements-dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ --r requirements-lint.txt --r requirements-test.txt +-r lint.txt +-r test.txt # Avoid adding requirements directly to this file. # Instead, modify the two files referenced above. diff --git a/docs/requirements-docs.txt b/requirements/docs.txt similarity index 100% rename from docs/requirements-docs.txt rename to requirements/docs.txt diff --git a/requirements-hpu.txt b/requirements/hpu.txt similarity index 88% rename from requirements-hpu.txt rename to requirements/hpu.txt index 63a5f8b1..a61d72d0 100644 --- a/requirements-hpu.txt +++ b/requirements/hpu.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt # Dependencies for HPU code ray diff --git a/requirements-lint.txt b/requirements/lint.txt similarity index 100% rename from requirements-lint.txt rename to requirements/lint.txt diff --git a/requirements-neuron.txt b/requirements/neuron.txt similarity index 77% rename from requirements-neuron.txt rename to requirements/neuron.txt index 09820c73..5f25bd05 100644 --- a/requirements-neuron.txt +++ b/requirements/neuron.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt # Dependencies for Neuron devices torch-neuronx >= 2.5.0 diff --git a/requirements-openvino.txt b/requirements/openvino.txt similarity index 94% rename from requirements-openvino.txt rename to requirements/openvino.txt index ac9d851d..04b8c3b0 100644 --- a/requirements-openvino.txt +++ b/requirements/openvino.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt torch == 2.5.1 # should be aligned with "common" vLLM torch version openvino >= 2024.4.0 # since 2024.4.0 both CPU and GPU support Paged Attention diff --git a/requirements-rocm-build.txt b/requirements/rocm-build.txt similarity index 89% rename from requirements-rocm-build.txt rename to requirements/rocm-build.txt index 00ae0340..4d4945b0 100644 --- a/requirements-rocm-build.txt +++ b/requirements/rocm-build.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt --extra-index-url https://download.pytorch.org/whl/rocm6.2 torch==2.5.1 diff --git a/requirements-rocm.txt b/requirements/rocm.txt similarity index 91% rename from requirements-rocm.txt rename to requirements/rocm.txt index 83f3e18a..345c84b0 100644 --- a/requirements-rocm.txt +++ b/requirements/rocm.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding diff --git a/requirements-test.in b/requirements/test.in similarity index 100% rename from requirements-test.in rename to requirements/test.in diff --git a/requirements-test.txt b/requirements/test.txt similarity index 90% rename from requirements-test.txt rename to requirements/test.txt index f5722c82..f1123207 100644 --- a/requirements-test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements-test.in -o requirements-test.txt +# uv pip compile requirements/test.in -o requirements/test.txt absl-py==2.1.0 # via rouge-score accelerate==1.0.1 @@ -32,9 +32,9 @@ attrs==24.2.0 audioread==3.0.1 # via librosa awscli==1.35.23 - # via -r requirements-test.in + # via -r requirements/test.in bitsandbytes==0.45.0 - # via -r requirements-test.in + # via -r requirements/test.in black==24.10.0 # via datamodel-code-generator boto3==1.35.57 @@ -47,7 +47,7 @@ botocore==1.35.57 bounded-pool-executor==0.0.3 # via pqdm buildkite-test-collector==0.1.9 - # via -r requirements-test.in + # via -r requirements/test.in certifi==2024.8.30 # via # httpcore @@ -79,7 +79,7 @@ cupy-cuda12x==13.3.0 cycler==0.12.1 # via matplotlib datamodel-code-generator==0.26.3 - # via -r requirements-test.in + # via -r requirements/test.in dataproperty==1.0.1 # via # pytablewriter @@ -91,7 +91,7 @@ datasets==3.0.2 decorator==5.1.1 # via librosa decord==0.6.0 - # via -r requirements-test.in + # via -r requirements/test.in dill==0.3.8 # via # datasets @@ -104,7 +104,7 @@ docutils==0.16 # via awscli einops==0.8.0 # via - # -r requirements-test.in + # -r requirements/test.in # encodec # vector-quantize-pytorch # vocos @@ -145,7 +145,7 @@ fsspec==2024.9.0 # huggingface-hub # torch genai-perf==0.0.8 - # via -r requirements-test.in + # via -r requirements/test.in genson==1.3.0 # via datamodel-code-generator h11==0.14.0 @@ -155,7 +155,7 @@ hiredis==3.0.0 httpcore==1.0.6 # via httpx httpx==0.27.2 - # via -r requirements-test.in + # via -r requirements/test.in huggingface-hub==0.26.2 # via # accelerate @@ -187,7 +187,7 @@ jinja2==3.1.4 # datamodel-code-generator # torch jiwer==3.0.5 - # via -r requirements-test.in + # via -r requirements/test.in jmespath==1.0.1 # via # boto3 @@ -214,11 +214,11 @@ lazy-loader==0.4 libnacl==2.1.0 # via tensorizer librosa==0.10.2.post1 - # via -r requirements-test.in + # via -r requirements/test.in llvmlite==0.43.0 # via numba lm-eval==0.4.4 - # via -r requirements-test.in + # via -r requirements/test.in lxml==5.3.0 # via sacrebleu markdown-it-py==3.0.0 @@ -226,7 +226,7 @@ markdown-it-py==3.0.0 markupsafe==3.0.2 # via jinja2 matplotlib==3.9.2 - # via -r requirements-test.in + # via -r requirements/test.in mbstrdecoder==1.1.3 # via # dataproperty @@ -235,7 +235,7 @@ mbstrdecoder==1.1.3 mdurl==0.1.2 # via markdown-it-py mistral-common==1.5.1 - # via -r requirements-test.in + # via -r requirements/test.in more-itertools==10.5.0 # via lm-eval mpmath==1.3.0 @@ -264,7 +264,7 @@ numexpr==2.10.1 # via lm-eval numpy==1.26.4 # via - # -r requirements-test.in + # -r requirements/test.in # accelerate # bitsandbytes # contourpy @@ -366,7 +366,7 @@ patsy==1.0.1 # via statsmodels peft==0.13.2 # via - # -r requirements-test.in + # -r requirements/test.in # lm-eval pillow==10.4.0 # via @@ -388,7 +388,7 @@ pooch==1.8.2 portalocker==2.10.1 # via sacrebleu pqdm==0.2.0 - # via -r requirements-test.in + # via -r requirements/test.in propcache==0.2.0 # via yarl protobuf==5.28.3 @@ -426,7 +426,7 @@ pytablewriter==1.2.0 # via lm-eval pytest==8.3.3 # via - # -r requirements-test.in + # -r requirements/test.in # buildkite-test-collector # genai-perf # pytest-asyncio @@ -435,15 +435,15 @@ pytest==8.3.3 # pytest-rerunfailures # pytest-shard pytest-asyncio==0.24.0 - # via -r requirements-test.in + # via -r requirements/test.in pytest-forked==1.6.0 - # via -r requirements-test.in + # via -r requirements/test.in pytest-mock==3.14.0 # via genai-perf pytest-rerunfailures==14.0 - # via -r requirements-test.in + # via -r requirements/test.in pytest-shard==0.1.2 - # via -r requirements-test.in + # via -r requirements/test.in python-dateutil==2.9.0.post0 # via # botocore @@ -473,7 +473,7 @@ pyyaml==6.0.2 rapidfuzz==3.12.1 # via jiwer ray==2.43.0 - # via -r requirements-test.in + # via -r requirements/test.in redis==5.2.0 # via tensorizer referencing==0.35.1 @@ -512,9 +512,9 @@ rpds-py==0.20.1 rsa==4.7.2 # via awscli runai-model-streamer==0.11.0 - # via -r requirements-test.in + # via -r requirements/test.in runai-model-streamer-s3==0.11.0 - # via -r requirements-test.in + # via -r requirements/test.in s3transfer==0.10.3 # via # awscli @@ -540,7 +540,7 @@ scipy==1.13.1 # statsmodels # vocos sentence-transformers==3.2.1 - # via -r requirements-test.in + # via -r requirements/test.in sentencepiece==0.2.0 # via mistral-common setuptools==75.8.0 @@ -557,7 +557,7 @@ sniffio==1.3.1 # httpx soundfile==0.12.1 # via - # -r requirements-test.in + # -r requirements/test.in # librosa soxr==0.5.0.post1 # via librosa @@ -580,7 +580,7 @@ tenacity==9.0.0 # lm-eval # plotly tensorizer==2.9.0 - # via -r requirements-test.in + # via -r requirements/test.in threadpoolctl==3.5.0 # via scikit-learn tiktoken==0.7.0 @@ -588,12 +588,12 @@ tiktoken==0.7.0 # lm-eval # mistral-common timm==1.0.11 - # via -r requirements-test.in + # via -r requirements/test.in tokenizers==0.21.0 # via transformers torch==2.5.1 # via - # -r requirements-test.in + # -r requirements/test.in # accelerate # bitsandbytes # encodec @@ -609,7 +609,7 @@ torch==2.5.1 # vocos torchaudio==2.5.1 # via - # -r requirements-test.in + # -r requirements/test.in # encodec # vocos torchvision==0.20.1 @@ -630,19 +630,19 @@ tqdm-multiprocess==0.0.11 # via lm-eval transformers==4.48.2 # via - # -r requirements-test.in + # -r requirements/test.in # genai-perf # lm-eval # peft # sentence-transformers # transformers-stream-generator transformers-stream-generator==0.0.5 - # via -r requirements-test.in + # via -r requirements/test.in triton==3.1.0 # via torch tritonclient==2.51.0 # via - # -r requirements-test.in + # -r requirements/test.in # genai-perf typepy==1.3.2 # via @@ -668,9 +668,9 @@ urllib3==2.2.3 # responses # tritonclient vector-quantize-pytorch==1.21.2 - # via -r requirements-test.in + # via -r requirements/test.in vocos==0.1.0 - # via -r requirements-test.in + # via -r requirements/test.in word2number==1.1 # via lm-eval xxhash==3.5.0 diff --git a/requirements-tpu.txt b/requirements/tpu.txt similarity index 98% rename from requirements-tpu.txt rename to requirements/tpu.txt index 4bc6a9b8..e8e3b0af 100644 --- a/requirements-tpu.txt +++ b/requirements/tpu.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt # Dependencies for TPU cmake>=3.26 diff --git a/requirements-xpu.txt b/requirements/xpu.txt similarity index 95% rename from requirements-xpu.txt rename to requirements/xpu.txt index be5cb6a4..26520595 100644 --- a/requirements-xpu.txt +++ b/requirements/xpu.txt @@ -1,5 +1,5 @@ # Common dependencies --r requirements-common.txt +-r common.txt ray >= 2.9 cmake>=3.26 diff --git a/setup.py b/setup.py index 749f415b..d18fe53f 100755 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ def load_module_from_path(module_name, path): return module -ROOT_DIR = os.path.dirname(__file__) +ROOT_DIR = Path(__file__).parent logger = logging.getLogger(__name__) # cannot import envs directly because it depends on vllm, @@ -520,10 +520,6 @@ def get_nvcc_cuda_version() -> Version: return nvcc_cuda_version -def get_path(*filepath) -> str: - return os.path.join(ROOT_DIR, *filepath) - - def get_gaudi_sw_version(): """ Returns the driver version. @@ -591,9 +587,10 @@ def get_vllm_version() -> str: def get_requirements() -> list[str]: """Get Python package dependencies from requirements.txt.""" + requirements_dir = ROOT_DIR / "requirements" def _read_requirements(filename: str) -> list[str]: - with open(get_path(filename)) as f: + with open(requirements_dir / filename) as f: requirements = f.read().strip().split("\n") resolved_requirements = [] for line in requirements: @@ -606,9 +603,9 @@ def get_requirements() -> list[str]: return resolved_requirements if _no_device(): - requirements = _read_requirements("requirements-common.txt") + requirements = _read_requirements("common.txt") elif _is_cuda(): - requirements = _read_requirements("requirements-cuda.txt") + requirements = _read_requirements("cuda.txt") cuda_major, cuda_minor = torch.version.cuda.split(".") modified_requirements = [] for req in requirements: @@ -619,19 +616,19 @@ def get_requirements() -> list[str]: modified_requirements.append(req) requirements = modified_requirements elif _is_hip(): - requirements = _read_requirements("requirements-rocm.txt") + requirements = _read_requirements("rocm.txt") elif _is_neuron(): - requirements = _read_requirements("requirements-neuron.txt") + requirements = _read_requirements("neuron.txt") elif _is_hpu(): - requirements = _read_requirements("requirements-hpu.txt") + requirements = _read_requirements("hpu.txt") elif _is_openvino(): - requirements = _read_requirements("requirements-openvino.txt") + requirements = _read_requirements("openvino.txt") elif _is_tpu(): - requirements = _read_requirements("requirements-tpu.txt") + requirements = _read_requirements("tpu.txt") elif _is_cpu(): - requirements = _read_requirements("requirements-cpu.txt") + requirements = _read_requirements("cpu.txt") elif _is_xpu(): - requirements = _read_requirements("requirements-xpu.txt") + requirements = _read_requirements("xpu.txt") else: raise ValueError( "Unsupported platform, please use CUDA, ROCm, Neuron, HPU, " diff --git a/use_existing_torch.py b/use_existing_torch.py index a578328b..7d352c6c 100644 --- a/use_existing_torch.py +++ b/use_existing_torch.py @@ -2,7 +2,7 @@ import glob -requires_files = glob.glob('requirements*.txt') +requires_files = glob.glob('requirements/*.txt') requires_files += ["pyproject.toml"] for file in requires_files: print(f">>> cleaning {file}")