[ROCm][Bugfix] Bring back fallback to eager mode removed in #14917, but for ROCm only (#15413)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
Gregory Shtrasberg 2025-04-04 12:40:37 -04:00 committed by GitHub
parent 40a36ccfeb
commit a6d042df0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -29,7 +29,7 @@ from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
get_quantization_config)
from vllm.model_executor.models import ModelRegistry
from vllm.platforms import CpuArchEnum
from vllm.platforms import CpuArchEnum, current_platform
from vllm.sampling_params import GuidedDecodingParams
from vllm.tracing import is_otel_available, otel_import_error_traceback
from vllm.transformers_utils.config import (
@ -684,6 +684,13 @@ class ModelConfig:
self.max_seq_len_to_capture = self.max_model_len
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len)
ROCM_UNSUPPORTED_MODELS = ['mllama']
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
and not self.enforce_eager and current_platform.is_rocm()):
logger.warning(
"CUDA graph is not supported for %s on ROCm yet, fallback "
"to the eager mode.", self.hf_config.model_type)
self.enforce_eager = True
def _verify_bnb_config(self) -> None:
"""