[Model] Molmo vLLM Integration (#9016)
Co-authored-by: sanghol <sanghol@allenai.org> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
parent
16b24e7dcd
commit
dfe43a2071
@ -399,6 +399,12 @@ Text Generation
|
||||
- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
|
||||
-
|
||||
-
|
||||
* - :code:`MolmoForCausalLM`
|
||||
- Molmo
|
||||
- Image
|
||||
- :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
|
||||
-
|
||||
- ✅︎
|
||||
* - :code:`NVLM_D_Model`
|
||||
- NVLM-D 1.0
|
||||
- Image\ :sup:`E+`
|
||||
|
@ -300,6 +300,23 @@ def run_mllama(question: str, modality: str):
|
||||
return llm, prompt, stop_token_ids
|
||||
|
||||
|
||||
# Molmo
|
||||
def run_molmo(question, modality):
|
||||
assert modality == "image"
|
||||
|
||||
model_name = "allenai/Molmo-7B-D-0924"
|
||||
|
||||
llm = LLM(
|
||||
model=model_name,
|
||||
trust_remote_code=True,
|
||||
dtype="bfloat16",
|
||||
)
|
||||
|
||||
prompt = question
|
||||
stop_token_ids = None
|
||||
return llm, prompt, stop_token_ids
|
||||
|
||||
|
||||
# GLM-4v
|
||||
def run_glm4v(question: str, modality: str):
|
||||
assert modality == "image"
|
||||
@ -331,6 +348,7 @@ model_example_map = {
|
||||
"qwen_vl": run_qwen_vl,
|
||||
"qwen2_vl": run_qwen2_vl,
|
||||
"mllama": run_mllama,
|
||||
"molmo": run_molmo,
|
||||
"glm4v": run_glm4v,
|
||||
}
|
||||
|
||||
|
@ -163,6 +163,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
|
||||
return "<|image|>"
|
||||
if model_type == "qwen2_vl":
|
||||
return "<|vision_start|><|image_pad|><|vision_end|>"
|
||||
if model_type == "molmo":
|
||||
return ""
|
||||
|
||||
raise TypeError(f"Unknown model type: {model_type}")
|
||||
elif modality == "audio":
|
||||
|
@ -20,4 +20,4 @@ __all__ = [
|
||||
"supports_multimodal",
|
||||
"SupportsPP",
|
||||
"supports_pp",
|
||||
]
|
||||
]
|
1290
vllm/model_executor/models/molmo.py
Normal file
1290
vllm/model_executor/models/molmo.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1167,8 +1167,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||
continue
|
||||
param = params_dict[name]
|
||||
except KeyError:
|
||||
print(params_dict.keys())
|
||||
raise
|
||||
raise ValueError(f"Unexpected weight: {name}") from None
|
||||
|
||||
weight_loader = getattr(param, "weight_loader",
|
||||
default_weight_loader)
|
||||
|
@ -104,6 +104,7 @@ _MULTIMODAL_MODELS = {
|
||||
"LlavaNextVideoForConditionalGeneration": ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), # noqa: E501
|
||||
"LlavaOnevisionForConditionalGeneration": ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), # noqa: E501
|
||||
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
||||
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
||||
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
||||
"PaliGemmaForConditionalGeneration": ("paligemma", "PaliGemmaForConditionalGeneration"), # noqa: E501
|
||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user