Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
parent
40a36ccfeb
commit
a6d042df0a
@ -29,7 +29,7 @@ from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
|
||||
get_quantization_config)
|
||||
from vllm.model_executor.models import ModelRegistry
|
||||
from vllm.platforms import CpuArchEnum
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.sampling_params import GuidedDecodingParams
|
||||
from vllm.tracing import is_otel_available, otel_import_error_traceback
|
||||
from vllm.transformers_utils.config import (
|
||||
@ -684,6 +684,13 @@ class ModelConfig:
|
||||
self.max_seq_len_to_capture = self.max_model_len
|
||||
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
|
||||
self.max_model_len)
|
||||
ROCM_UNSUPPORTED_MODELS = ['mllama']
|
||||
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
|
||||
and not self.enforce_eager and current_platform.is_rocm()):
|
||||
logger.warning(
|
||||
"CUDA graph is not supported for %s on ROCm yet, fallback "
|
||||
"to the eager mode.", self.hf_config.model_type)
|
||||
self.enforce_eager = True
|
||||
|
||||
def _verify_bnb_config(self) -> None:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user