diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index ef19c41e..72fe21df 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -184,6 +184,10 @@ class LlamaDecoderLayer(nn.Module): max_position_embeddings = getattr(config, "max_position_embeddings", 8192) sliding_window = getattr(config, "sliding_window", None) + # Support abacusai/Smaug-72B-v0.1 with attention_bias + # Support internlm/internlm-7b with bias + attention_bias = getattr(config, "attention_bias", False) or getattr( + config, "bias", False) self.self_attn = LlamaAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -193,7 +197,7 @@ class LlamaDecoderLayer(nn.Module): rope_scaling=rope_scaling, max_position_embeddings=max_position_embeddings, linear_method=linear_method, - bias=getattr(config, "bias", False), + bias=attention_bias, sliding_window=sliding_window, ) self.mlp = LlamaMLP(