Fix the issue for AquilaChat2-* models (#1339)
This commit is contained in:
parent
e7c8555d06
commit
de89472897
@ -14,6 +14,7 @@ from vllm.model_executor.weight_utils import (get_quant_config,
|
|||||||
# TODO(woosuk): Lazy-load the model classes.
|
# TODO(woosuk): Lazy-load the model classes.
|
||||||
_MODEL_REGISTRY = {
|
_MODEL_REGISTRY = {
|
||||||
"AquilaModel": AquilaForCausalLM,
|
"AquilaModel": AquilaForCausalLM,
|
||||||
|
"AquilaForCausalLM": AquilaForCausalLM, # AquilaChat2
|
||||||
"BaiChuanForCausalLM": BaiChuanForCausalLM, # baichuan-7b
|
"BaiChuanForCausalLM": BaiChuanForCausalLM, # baichuan-7b
|
||||||
"BaichuanForCausalLM": BaichuanForCausalLM, # baichuan-13b
|
"BaichuanForCausalLM": BaichuanForCausalLM, # baichuan-13b
|
||||||
"BloomForCausalLM": BloomForCausalLM,
|
"BloomForCausalLM": BloomForCausalLM,
|
||||||
|
@ -147,6 +147,7 @@ class AquilaAttention(nn.Module):
|
|||||||
rotary_dim=self.head_dim,
|
rotary_dim=self.head_dim,
|
||||||
base=self.rope_theta,
|
base=self.rope_theta,
|
||||||
max_position=self.max_position_embeddings,
|
max_position=self.max_position_embeddings,
|
||||||
|
num_kv_heads=self.num_kv_heads,
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
@ -177,7 +178,7 @@ class AquilaDecoderLayer(nn.Module):
|
|||||||
self.self_attn = AquilaAttention(
|
self.self_attn = AquilaAttention(
|
||||||
hidden_size=self.hidden_size,
|
hidden_size=self.hidden_size,
|
||||||
num_heads=config.num_attention_heads,
|
num_heads=config.num_attention_heads,
|
||||||
num_kv_heads=config.num_attention_heads,
|
num_kv_heads=config.num_key_value_heads,
|
||||||
rope_theta=rope_theta,
|
rope_theta=rope_theta,
|
||||||
max_position_embeddings=max_position_embeddings,
|
max_position_embeddings=max_position_embeddings,
|
||||||
)
|
)
|
||||||
@ -308,7 +309,7 @@ class AquilaForCausalLM(nn.Module):
|
|||||||
q_proj_shard_size = (self.config.hidden_size // tp_size)
|
q_proj_shard_size = (self.config.hidden_size // tp_size)
|
||||||
kv_proj_shard_size = (self.config.hidden_size //
|
kv_proj_shard_size = (self.config.hidden_size //
|
||||||
self.config.num_attention_heads *
|
self.config.num_attention_heads *
|
||||||
self.config.num_attention_heads // tp_size)
|
self.config.num_key_value_heads // tp_size)
|
||||||
attention_weight_specs = [
|
attention_weight_specs = [
|
||||||
# (weight_name, shard_size, offset)
|
# (weight_name, shard_size, offset)
|
||||||
("q_proj", q_proj_shard_size, 0),
|
("q_proj", q_proj_shard_size, 0),
|
||||||
|
@ -33,6 +33,7 @@ class AquilaConfig(PretrainedConfig):
|
|||||||
intermediate_size=11008,
|
intermediate_size=11008,
|
||||||
num_hidden_layers=32,
|
num_hidden_layers=32,
|
||||||
num_attention_heads=32,
|
num_attention_heads=32,
|
||||||
|
num_key_value_heads=None,
|
||||||
hidden_act="silu",
|
hidden_act="silu",
|
||||||
max_position_embeddings=2048,
|
max_position_embeddings=2048,
|
||||||
initializer_range=0.006,
|
initializer_range=0.006,
|
||||||
@ -49,6 +50,11 @@ class AquilaConfig(PretrainedConfig):
|
|||||||
self.hidden_size = hidden_size
|
self.hidden_size = hidden_size
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = intermediate_size
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
# for backward compatibility
|
||||||
|
if num_key_value_heads is None:
|
||||||
|
num_key_value_heads = num_attention_heads
|
||||||
|
|
||||||
|
self.num_key_value_heads = num_key_value_heads
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = num_attention_heads
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = hidden_act
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
|
Loading…
x
Reference in New Issue
Block a user