diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index fd3dbe79..fa8ce60e 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -340,6 +340,10 @@ class GemmaForCausalLM(nn.Module): weight_loader(param, loaded_weight, shard_id) break else: + # lm_head is not used in vllm as it is tied with embed_token. + # To prevent errors, skip loading lm_head.weight. + if "lm_head.weight" in name: + continue # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue