[Misc] Optimize Qwen2-VL LoRA test (#11663)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
365801fedd
commit
11d8a091c6
@ -7,7 +7,7 @@ from vllm.assets.image import ImageAsset
|
|||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
MODEL_PATH = "Qwen/Qwen2-VL-7B-Instruct"
|
MODEL_PATH = "Qwen/Qwen2-VL-2B-Instruct"
|
||||||
|
|
||||||
PROMPT_TEMPLATE = (
|
PROMPT_TEMPLATE = (
|
||||||
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
|
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
|
||||||
@ -49,10 +49,9 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
|
|||||||
# Print the outputs.
|
# Print the outputs.
|
||||||
generated_texts: List[str] = []
|
generated_texts: List[str] = []
|
||||||
for output in outputs:
|
for output in outputs:
|
||||||
prompt = output.prompt
|
|
||||||
generated_text = output.outputs[0].text.strip()
|
generated_text = output.outputs[0].text.strip()
|
||||||
generated_texts.append(generated_text)
|
generated_texts.append(generated_text)
|
||||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
print(f"Generated text: {generated_text!r}")
|
||||||
return generated_texts
|
return generated_texts
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@ from vllm.model_executor.layers.quantization.gptq_marlin import (
|
|||||||
GPTQMarlinConfig)
|
GPTQMarlinConfig)
|
||||||
from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
|
from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||||
|
from vllm.model_executor.models.module_mapping import MultiModelKeys
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
from vllm.multimodal.inputs import (ImageItem, ModalityData,
|
from vllm.multimodal.inputs import (ImageItem, ModalityData,
|
||||||
MultiModalFieldConfig, MultiModalKwargs,
|
MultiModalFieldConfig, MultiModalKwargs,
|
||||||
@ -926,15 +927,23 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
}
|
}
|
||||||
|
|
||||||
# LoRA specific attributes
|
# LoRA specific attributes
|
||||||
# TODO Support LoRA for the visual encoder in the future.
|
|
||||||
supported_lora_modules = [
|
supported_lora_modules = [
|
||||||
"qkv_proj",
|
"qkv_proj",
|
||||||
"o_proj",
|
"o_proj",
|
||||||
"gate_up_proj",
|
"gate_up_proj",
|
||||||
"down_proj",
|
"down_proj",
|
||||||
|
# vision tower
|
||||||
|
"qkv",
|
||||||
|
"attn.proj", # Distinguish patch_embed.proj
|
||||||
|
"fc1",
|
||||||
|
"fc2",
|
||||||
|
# projector
|
||||||
|
"mlp.0",
|
||||||
|
"mlp.2"
|
||||||
]
|
]
|
||||||
embedding_modules = {}
|
embedding_modules = {}
|
||||||
embedding_padding_modules = []
|
embedding_padding_modules = []
|
||||||
|
|
||||||
# To ensure correct weight loading and mapping.
|
# To ensure correct weight loading and mapping.
|
||||||
hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={
|
hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={
|
||||||
"lm_head.": "language_model.lm_head.",
|
"lm_head.": "language_model.lm_head.",
|
||||||
@ -1231,3 +1240,12 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
|
|
||||||
loader = AutoWeightsLoader(self)
|
loader = AutoWeightsLoader(self)
|
||||||
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
|
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
|
||||||
|
|
||||||
|
def get_mm_mapping(self) -> MultiModelKeys:
|
||||||
|
"""
|
||||||
|
Get the module prefix in multimodal models
|
||||||
|
"""
|
||||||
|
return MultiModelKeys.from_string_field(
|
||||||
|
language_model="language_model",
|
||||||
|
connector="visual.",
|
||||||
|
tower_model="visual.merger.")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user