Fix the issue for AquilaChat2-* models (#1339)

This commit is contained in:
Lu Wang 2023-10-13 11:51:29 -07:00 committed by GitHub
parent e7c8555d06
commit de89472897
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 2 deletions

View File

@ -14,6 +14,7 @@ from vllm.model_executor.weight_utils import (get_quant_config,
# TODO(woosuk): Lazy-load the model classes. # TODO(woosuk): Lazy-load the model classes.
_MODEL_REGISTRY = { _MODEL_REGISTRY = {
"AquilaModel": AquilaForCausalLM, "AquilaModel": AquilaForCausalLM,
"AquilaForCausalLM": AquilaForCausalLM, # AquilaChat2
"BaiChuanForCausalLM": BaiChuanForCausalLM, # baichuan-7b "BaiChuanForCausalLM": BaiChuanForCausalLM, # baichuan-7b
"BaichuanForCausalLM": BaichuanForCausalLM, # baichuan-13b "BaichuanForCausalLM": BaichuanForCausalLM, # baichuan-13b
"BloomForCausalLM": BloomForCausalLM, "BloomForCausalLM": BloomForCausalLM,

View File

@ -147,6 +147,7 @@ class AquilaAttention(nn.Module):
rotary_dim=self.head_dim, rotary_dim=self.head_dim,
base=self.rope_theta, base=self.rope_theta,
max_position=self.max_position_embeddings, max_position=self.max_position_embeddings,
num_kv_heads=self.num_kv_heads,
) )
def forward( def forward(
@ -177,7 +178,7 @@ class AquilaDecoderLayer(nn.Module):
self.self_attn = AquilaAttention( self.self_attn = AquilaAttention(
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_heads=config.num_attention_heads, num_heads=config.num_attention_heads,
num_kv_heads=config.num_attention_heads, num_kv_heads=config.num_key_value_heads,
rope_theta=rope_theta, rope_theta=rope_theta,
max_position_embeddings=max_position_embeddings, max_position_embeddings=max_position_embeddings,
) )
@ -308,7 +309,7 @@ class AquilaForCausalLM(nn.Module):
q_proj_shard_size = (self.config.hidden_size // tp_size) q_proj_shard_size = (self.config.hidden_size // tp_size)
kv_proj_shard_size = (self.config.hidden_size // kv_proj_shard_size = (self.config.hidden_size //
self.config.num_attention_heads * self.config.num_attention_heads *
self.config.num_attention_heads // tp_size) self.config.num_key_value_heads // tp_size)
attention_weight_specs = [ attention_weight_specs = [
# (weight_name, shard_size, offset) # (weight_name, shard_size, offset)
("q_proj", q_proj_shard_size, 0), ("q_proj", q_proj_shard_size, 0),

View File

@ -33,6 +33,7 @@ class AquilaConfig(PretrainedConfig):
intermediate_size=11008, intermediate_size=11008,
num_hidden_layers=32, num_hidden_layers=32,
num_attention_heads=32, num_attention_heads=32,
num_key_value_heads=None,
hidden_act="silu", hidden_act="silu",
max_position_embeddings=2048, max_position_embeddings=2048,
initializer_range=0.006, initializer_range=0.006,
@ -49,6 +50,11 @@ class AquilaConfig(PretrainedConfig):
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.intermediate_size = intermediate_size self.intermediate_size = intermediate_size
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
# for backward compatibility
if num_key_value_heads is None:
num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.initializer_range = initializer_range self.initializer_range = initializer_range