From e31045f95ca0f7262b156cd7e3e34100cbf1f4d1 Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Thu, 17 Apr 2025 22:51:30 -0700 Subject: [PATCH] [Bugfix] fix pp for llama4 (#16746) Signed-off-by: Lu Fang --- vllm/model_executor/models/mllama4.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 0966f546..69e3ea8b 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -672,9 +672,9 @@ class Llama4ForConditionalGeneration(nn.Module, SupportsMultiModal, self.config, None, prefix=maybe_prefix(prefix, "multi_modal_projector")) - self.language_model = _initialize_model( - vllm_config=vllm_config.with_hf_config(config.text_config), + vllm_config=vllm_config.with_hf_config(config.text_config, + ["LlamaForCausalLM"]), prefix=maybe_prefix(prefix, "language_model"), model_class=Llama4ForCausalLM, ) @@ -824,7 +824,7 @@ class Llama4ForConditionalGeneration(nn.Module, SupportsMultiModal, # language_model is an Llama4ForCausalLM instance. We load it's # using llama4's load_weights routine. language_model_weights, other_weights = self.separate_weights( - weights, prefix="language_model.model.") + weights, prefix="language_model.") loader = AutoWeightsLoader(self) loaded_language_model_params = loader.load_weights( language_model_weights)