From 382b6a4852b9afc9a740b02736688e20f7d58446 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 19 Nov 2024 16:54:58 +0800 Subject: [PATCH] [Misc] Avoid misleading warning messages (#10438) Signed-off-by: Jee Jee Li --- vllm/model_executor/models/chatglm.py | 5 ++--- vllm/model_executor/models/qwen.py | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 625e31bb..2ea592aa 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -575,8 +575,7 @@ class ChatGLMModel(nn.Module): return hidden_states -class ChatGLMBaseModel(nn.Module, SupportsLoRA, SupportsPP, - SupportsMultiModal): +class ChatGLMBaseModel(nn.Module, SupportsLoRA, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() @@ -695,7 +694,7 @@ class ChatGLM(ChatGLMBaseModel): embedding_padding_modules = [] -class ChatGLMV(ChatGLMBaseModel): +class ChatGLMV(ChatGLMBaseModel, SupportsMultiModal): packed_modules_mapping = { "query_key_value": ["query_key_value"], "dense_h_to_4h": ["dense_h_to_4h"], diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index 3978c176..44ce6eda 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -870,7 +870,7 @@ def dummy_data_for_qwen( return DummyData(seq_data, mm_data) -class QWenBaseModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): +class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() @@ -1024,7 +1024,7 @@ class QWenLLM(QWenBaseModel): embedding_padding_modules = [] -class QWenVL(QWenBaseModel): +class QWenVL(QWenBaseModel, SupportsMultiModal): packed_modules_mapping = { "c_attn": ["c_attn"], "gate_up_proj": [ @@ -1062,7 +1062,7 @@ class QWenVL(QWenBaseModel): @MULTIMODAL_REGISTRY.register_max_image_tokens(MAX_QWEN_IMG_TOKENS) @INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen) @INPUT_REGISTRY.register_input_processor(input_processor_for_qwen) -class QWenLMHeadModel(QWenBaseModel, SupportsLoRA): +class QWenLMHeadModel(QWenBaseModel, SupportsMultiModal, SupportsLoRA): """ QWenLMHeadModel is not only applicable to LLM but also to VL, which is not conducive to the current integration logic of LoRA in vLLM. Therefore, it @@ -1083,7 +1083,7 @@ class QWenLMHeadModel(QWenBaseModel, SupportsLoRA): config = vllm_config.model_config.hf_config # Initialize VL if hasattr(config, "visual"): - return QWenVL(vllm_config=vllm_config) + return QWenVL(vllm_config=vllm_config, prefix=prefix) # Initialize LLM else: - return QWenLLM(vllm_config=vllm_config) + return QWenLLM(vllm_config=vllm_config, prefix=prefix)