Update to torch==2.6.0 (#12721)

Signed-off-by: mgoin <michael@neuralmagic.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: luka <luka@neuralmagic.com>
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Michael Goin 2025-03-14 16:58:30 -04:00 committed by GitHub
parent 46f98893dd
commit 14f301b541
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 43 additions and 23 deletions

View File

@ -46,8 +46,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1")
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1")
set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
#
# Try to find python package with an executable that exactly matches

View File

@ -222,7 +222,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
fi
COPY examples examples

View File

@ -6,7 +6,7 @@ requires = [
"packaging",
"setuptools>=61",
"setuptools-scm>=8.0",
"torch == 2.5.1",
"torch == 2.6.0",
"wheel",
"jinja2",
]

View File

@ -4,6 +4,6 @@ ninja
packaging
setuptools>=61
setuptools-scm>=8
torch==2.5.1
torch==2.6.0
wheel
jinja2

View File

@ -4,9 +4,9 @@
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
# Dependencies for NVIDIA GPUs
ray[cgraph] >= 2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch == 2.5.1
torchaudio==2.5.1
ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.6.0
torchaudio==2.6.0
# These must be updated alongside torch
torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1
torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.6.0

View File

@ -21,8 +21,9 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
timm # required for internvl test
torch==2.5.1
torchaudio==2.5.1
torch==2.6.0
torchaudio==2.6.0
torchvision==0.21.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.5.0 # required for pixtral test
@ -30,7 +31,7 @@ datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.4 # required for model evaluation test
transformers==4.48.2
# quantization
bitsandbytes>=0.45.0
bitsandbytes>=0.45.3
buildkite-test-collector==0.1.9
genai_perf==0.0.8

View File

@ -33,7 +33,7 @@ audioread==3.0.1
# via librosa
awscli==1.35.23
# via -r requirements/test.in
bitsandbytes==0.45.0
bitsandbytes==0.45.3
# via -r requirements/test.in
black==24.10.0
# via datamodel-code-generator
@ -127,7 +127,6 @@ filelock==3.16.1
# ray
# torch
# transformers
# triton
fonttools==4.54.1
# via matplotlib
frozendict==2.4.6
@ -320,6 +319,8 @@ nvidia-cusparse-cu12==12.3.1.170
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.6.2
# via torch
nvidia-nccl-cu12==2.21.5
# via torch
nvidia-nvjitlink-cu12==12.4.127
@ -591,7 +592,7 @@ timm==1.0.11
# via -r requirements/test.in
tokenizers==0.21.0
# via transformers
torch==2.5.1
torch==2.6.0
# via
# -r requirements/test.in
# accelerate
@ -607,13 +608,15 @@ torch==2.5.1
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.5.1
torchaudio==2.6.0
# via
# -r requirements/test.in
# encodec
# vocos
torchvision==0.20.1
# via timm
torchvision==0.21.0
# via
# -r requirements/test.in
# timm
tqdm==4.66.6
# via
# datasets
@ -638,7 +641,7 @@ transformers==4.48.2
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.1.0
triton==3.2.0
# via torch
tritonclient==2.51.0
# via
@ -651,7 +654,6 @@ typepy==1.3.2
# tabledata
typing-extensions==4.12.2
# via
# bitsandbytes
# huggingface-hub
# librosa
# mistral-common

View File

@ -6,6 +6,7 @@ from typing import Callable, Union
from torch import fx
from vllm.compilation.inductor_pass import InductorPass
from vllm.config import get_current_vllm_config
class TestBackend:
@ -17,13 +18,14 @@ class TestBackend:
Inductor config can be modified directly by editing the inductor_config
property. This can be helpful for adding passes like the
'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
Inductor config is default-initialized from VllmConfig.CompilationConfig.
"""
def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
None]]):
self.custom_passes = list(passes)
from torch._inductor import config
self.inductor_config = config.shallow_copy_dict()
compile_config = get_current_vllm_config().compilation_config
self.inductor_config = compile_config.inductor_compile_config
self.inductor_config['force_disable_caches'] = True
self.inductor_config['post_grad_custom_post_pass'] = self.post_pass

View File

@ -52,6 +52,8 @@ if TYPE_CHECKING:
else:
QuantizationConfig = None
from packaging.version import Version
logger = init_logger(__name__)
# This value is chosen to have a balance between ITL and TTFT. Note it is
@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
# TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
# 1. A bug in PyTorch, fixed in 2.7:
# https://github.com/pytorch/pytorch/issues/147924
# 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
# work with V2. Addressing this will take extra engineering effort
# and it is not yet a priority. RFC here:
# https://github.com/vllm-project/vllm/issues/14703
if Version(torch.__version__) >= Version("2.6"):
KEY = 'enable_auto_functionalized_v2'
if KEY not in self.inductor_compile_config:
self.inductor_compile_config[KEY] = False
if self.splitting_ops is None:
if envs.VLLM_USE_V1:
# v1 must split the graph on attention ops