Update to torch==2.6.0 (#12721)

Signed-off-by: mgoin <michael@neuralmagic.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: luka <luka@neuralmagic.com>
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Michael Goin 2025-03-14 16:58:30 -04:00 committed by GitHub
parent 46f98893dd
commit 14f301b541
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 43 additions and 23 deletions

View File

@ -46,8 +46,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
# requirements.txt files and should be kept consistent. The ROCm torch # requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm # versions are derived from Dockerfile.rocm
# #
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1") set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1") set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
# #
# Try to find python package with an executable that exactly matches # Try to find python package with an executable that exactly matches

View File

@ -222,7 +222,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \ uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
fi fi
COPY examples examples COPY examples examples

View File

@ -6,7 +6,7 @@ requires = [
"packaging", "packaging",
"setuptools>=61", "setuptools>=61",
"setuptools-scm>=8.0", "setuptools-scm>=8.0",
"torch == 2.5.1", "torch == 2.6.0",
"wheel", "wheel",
"jinja2", "jinja2",
] ]

View File

@ -4,6 +4,6 @@ ninja
packaging packaging
setuptools>=61 setuptools>=61
setuptools-scm>=8 setuptools-scm>=8
torch==2.5.1 torch==2.6.0
wheel wheel
jinja2 jinja2

View File

@ -4,9 +4,9 @@
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph] >= 2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1. ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch == 2.5.1 torch==2.6.0
torchaudio==2.5.1 torchaudio==2.6.0
# These must be updated alongside torch # These must be updated alongside torch
torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1 xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.6.0

View File

@ -21,8 +21,9 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests soundfile # required for audio tests
jiwer # required for audio tests jiwer # required for audio tests
timm # required for internvl test timm # required for internvl test
torch==2.5.1 torch==2.6.0
torchaudio==2.5.1 torchaudio==2.6.0
torchvision==0.21.0
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.5.0 # required for pixtral test mistral_common[opencv] >= 1.5.0 # required for pixtral test
@ -30,7 +31,7 @@ datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.4 # required for model evaluation test lm-eval[api]==0.4.4 # required for model evaluation test
transformers==4.48.2 transformers==4.48.2
# quantization # quantization
bitsandbytes>=0.45.0 bitsandbytes>=0.45.3
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
genai_perf==0.0.8 genai_perf==0.0.8

View File

@ -33,7 +33,7 @@ audioread==3.0.1
# via librosa # via librosa
awscli==1.35.23 awscli==1.35.23
# via -r requirements/test.in # via -r requirements/test.in
bitsandbytes==0.45.0 bitsandbytes==0.45.3
# via -r requirements/test.in # via -r requirements/test.in
black==24.10.0 black==24.10.0
# via datamodel-code-generator # via datamodel-code-generator
@ -127,7 +127,6 @@ filelock==3.16.1
# ray # ray
# torch # torch
# transformers # transformers
# triton
fonttools==4.54.1 fonttools==4.54.1
# via matplotlib # via matplotlib
frozendict==2.4.6 frozendict==2.4.6
@ -320,6 +319,8 @@ nvidia-cusparse-cu12==12.3.1.170
# via # via
# nvidia-cusolver-cu12 # nvidia-cusolver-cu12
# torch # torch
nvidia-cusparselt-cu12==0.6.2
# via torch
nvidia-nccl-cu12==2.21.5 nvidia-nccl-cu12==2.21.5
# via torch # via torch
nvidia-nvjitlink-cu12==12.4.127 nvidia-nvjitlink-cu12==12.4.127
@ -591,7 +592,7 @@ timm==1.0.11
# via -r requirements/test.in # via -r requirements/test.in
tokenizers==0.21.0 tokenizers==0.21.0
# via transformers # via transformers
torch==2.5.1 torch==2.6.0
# via # via
# -r requirements/test.in # -r requirements/test.in
# accelerate # accelerate
@ -607,13 +608,15 @@ torch==2.5.1
# torchvision # torchvision
# vector-quantize-pytorch # vector-quantize-pytorch
# vocos # vocos
torchaudio==2.5.1 torchaudio==2.6.0
# via # via
# -r requirements/test.in # -r requirements/test.in
# encodec # encodec
# vocos # vocos
torchvision==0.20.1 torchvision==0.21.0
# via timm # via
# -r requirements/test.in
# timm
tqdm==4.66.6 tqdm==4.66.6
# via # via
# datasets # datasets
@ -638,7 +641,7 @@ transformers==4.48.2
# transformers-stream-generator # transformers-stream-generator
transformers-stream-generator==0.0.5 transformers-stream-generator==0.0.5
# via -r requirements/test.in # via -r requirements/test.in
triton==3.1.0 triton==3.2.0
# via torch # via torch
tritonclient==2.51.0 tritonclient==2.51.0
# via # via
@ -651,7 +654,6 @@ typepy==1.3.2
# tabledata # tabledata
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# bitsandbytes
# huggingface-hub # huggingface-hub
# librosa # librosa
# mistral-common # mistral-common

View File

@ -6,6 +6,7 @@ from typing import Callable, Union
from torch import fx from torch import fx
from vllm.compilation.inductor_pass import InductorPass from vllm.compilation.inductor_pass import InductorPass
from vllm.config import get_current_vllm_config
class TestBackend: class TestBackend:
@ -17,13 +18,14 @@ class TestBackend:
Inductor config can be modified directly by editing the inductor_config Inductor config can be modified directly by editing the inductor_config
property. This can be helpful for adding passes like the property. This can be helpful for adding passes like the
'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'. 'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
Inductor config is default-initialized from VllmConfig.CompilationConfig.
""" """
def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph], def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
None]]): None]]):
self.custom_passes = list(passes) self.custom_passes = list(passes)
from torch._inductor import config compile_config = get_current_vllm_config().compilation_config
self.inductor_config = config.shallow_copy_dict() self.inductor_config = compile_config.inductor_compile_config
self.inductor_config['force_disable_caches'] = True self.inductor_config['force_disable_caches'] = True
self.inductor_config['post_grad_custom_post_pass'] = self.post_pass self.inductor_config['post_grad_custom_post_pass'] = self.post_pass

View File

@ -52,6 +52,8 @@ if TYPE_CHECKING:
else: else:
QuantizationConfig = None QuantizationConfig = None
from packaging.version import Version
logger = init_logger(__name__) logger = init_logger(__name__)
# This value is chosen to have a balance between ITL and TTFT. Note it is # This value is chosen to have a balance between ITL and TTFT. Note it is
@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
count_all = self.custom_ops.count("all") count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'" assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
# TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
# 1. A bug in PyTorch, fixed in 2.7:
# https://github.com/pytorch/pytorch/issues/147924
# 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
# work with V2. Addressing this will take extra engineering effort
# and it is not yet a priority. RFC here:
# https://github.com/vllm-project/vllm/issues/14703
if Version(torch.__version__) >= Version("2.6"):
KEY = 'enable_auto_functionalized_v2'
if KEY not in self.inductor_compile_config:
self.inductor_compile_config[KEY] = False
if self.splitting_ops is None: if self.splitting_ops is None:
if envs.VLLM_USE_V1: if envs.VLLM_USE_V1:
# v1 must split the graph on attention ops # v1 must split the graph on attention ops