Update to torch==2.6.0 (#12721)

Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-03-14 16:58:30 -04:00 · 2025-03-14 16:58:30 -04:00 · 14f301b541
commit 14f301b541
parent 46f98893dd
9 changed files with 43 additions and 23 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -46,8 +46,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")

 #
 # Try to find python package with an executable that exactly matches
--- a/2
+++ b/2
@ -222,7 +222,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist

 RUN --mount=type=cache,target=/root/.cache/uv \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
-    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \
+    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
 fi
 COPY examples examples

--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,7 +6,7 @@ requires = [
    "packaging",
    "setuptools>=61",
    "setuptools-scm>=8.0",
-    "torch == 2.5.1",
+    "torch == 2.6.0",
    "wheel",
    "jinja2",
 ]
--- a/requirements/build.txt
+++ b/requirements/build.txt
@ -4,6 +4,6 @@ ninja
 packaging
 setuptools>=61
 setuptools-scm>=8
-torch==2.5.1
+torch==2.6.0
 wheel
 jinja2
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@ -4,9 +4,9 @@
 numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding

 # Dependencies for NVIDIA GPUs
-ray[cgraph] >= 2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch == 2.5.1
-torchaudio==2.5.1
+ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+torch==2.6.0
+torchaudio==2.6.0
 # These must be updated alongside torch
-torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
-xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.5.1
+torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.6.0
--- a/requirements/test.in
+++ b/requirements/test.in
@ -21,8 +21,9 @@ sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
 timm # required for internvl test
-torch==2.5.1
-torchaudio==2.5.1
+torch==2.6.0
+torchaudio==2.6.0
+torchvision==0.21.0
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[opencv] >= 1.5.0 # required for pixtral test
@ -30,7 +31,7 @@ datamodel_code_generator # required for minicpm3 test
 lm-eval[api]==0.4.4 # required for model evaluation test
 transformers==4.48.2 
 # quantization
-bitsandbytes>=0.45.0
+bitsandbytes>=0.45.3
 buildkite-test-collector==0.1.9

 genai_perf==0.0.8
--- a/requirements/test.txt
+++ b/requirements/test.txt
@ -33,7 +33,7 @@ audioread==3.0.1
    # via librosa
 awscli==1.35.23
    # via -r requirements/test.in
-bitsandbytes==0.45.0
+bitsandbytes==0.45.3
    # via -r requirements/test.in
 black==24.10.0
    # via datamodel-code-generator
@ -127,7 +127,6 @@ filelock==3.16.1
    #   ray
    #   torch
    #   transformers
-    #   triton
 fonttools==4.54.1
    # via matplotlib
 frozendict==2.4.6
@ -320,6 +319,8 @@ nvidia-cusparse-cu12==12.3.1.170
    # via
    #   nvidia-cusolver-cu12
    #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
 nvidia-nccl-cu12==2.21.5
    # via torch
 nvidia-nvjitlink-cu12==12.4.127
@ -591,7 +592,7 @@ timm==1.0.11
    # via -r requirements/test.in
 tokenizers==0.21.0
    # via transformers
-torch==2.5.1
+torch==2.6.0
    # via
    #   -r requirements/test.in
    #   accelerate
@ -607,13 +608,15 @@ torch==2.5.1
    #   torchvision
    #   vector-quantize-pytorch
    #   vocos
-torchaudio==2.5.1
+torchaudio==2.6.0
    # via
    #   -r requirements/test.in
    #   encodec
    #   vocos
-torchvision==0.20.1
-    # via timm
+torchvision==0.21.0
+    # via
+    #   -r requirements/test.in
+    #   timm
 tqdm==4.66.6
    # via
    #   datasets
@ -638,7 +641,7 @@ transformers==4.48.2
    #   transformers-stream-generator
 transformers-stream-generator==0.0.5
    # via -r requirements/test.in
-triton==3.1.0
+triton==3.2.0
    # via torch
 tritonclient==2.51.0
    # via
@ -651,7 +654,6 @@ typepy==1.3.2
    #   tabledata
 typing-extensions==4.12.2
    # via
-    #   bitsandbytes
    #   huggingface-hub
    #   librosa
    #   mistral-common
--- a/tests/compile/backend.py
+++ b/tests/compile/backend.py
@ -6,6 +6,7 @@ from typing import Callable, Union
 from torch import fx

 from vllm.compilation.inductor_pass import InductorPass
+from vllm.config import get_current_vllm_config


 class TestBackend:
@ -17,13 +18,14 @@ class TestBackend:
    Inductor config can be modified directly by editing the inductor_config
    property. This can be helpful for adding passes like the
    'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
+    Inductor config is default-initialized from VllmConfig.CompilationConfig.
    """

    def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
                                                             None]]):
        self.custom_passes = list(passes)
-        from torch._inductor import config
-        self.inductor_config = config.shallow_copy_dict()
+        compile_config = get_current_vllm_config().compilation_config
+        self.inductor_config = compile_config.inductor_compile_config
        self.inductor_config['force_disable_caches'] = True
        self.inductor_config['post_grad_custom_post_pass'] = self.post_pass

--- a/vllm/config.py
+++ b/vllm/config.py
@ -52,6 +52,8 @@ if TYPE_CHECKING:
 else:
    QuantizationConfig = None

+from packaging.version import Version
+
 logger = init_logger(__name__)

 # This value is chosen to have a balance between ITL and TTFT. Note it is
@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
        count_all = self.custom_ops.count("all")
        assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"

+        # TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
+        # 1. A bug in PyTorch, fixed in 2.7:
+        #    https://github.com/pytorch/pytorch/issues/147924
+        # 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
+        #    work with V2. Addressing this will take extra engineering effort
+        #    and it is not yet a priority. RFC here:
+        #    https://github.com/vllm-project/vllm/issues/14703
+
+        if Version(torch.__version__) >= Version("2.6"):
+            KEY = 'enable_auto_functionalized_v2'
+            if KEY not in self.inductor_compile_config:
+                self.inductor_compile_config[KEY] = False
+
        if self.splitting_ops is None:
            if envs.VLLM_USE_V1:
                # v1 must split the graph on attention ops