Sync huggingface modifications of qwen Moe model (#4774)

This commit is contained in:
eigenLiu 2024-05-18 00:43:19 +08:00 committed by GitHub
parent 33e0823de5
commit 48d5985a08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -283,8 +283,9 @@ class Qwen2MoeDecoderLayer(nn.Module):
cache_config=cache_config,
quant_config=quant_config,
)
if (config.num_experts is not None
and (layer_idx + 1) % config.decoder_sparse_step == 0):
if (layer_idx not in config.mlp_only_layers) and (
config.num_experts > 0 and
(layer_idx + 1) % config.decoder_sparse_step == 0):
self.mlp = Qwen2MoeSparseMoeBlock(config=config,
quant_config=quant_config)
else:
@ -439,6 +440,9 @@ class Qwen2MoeForCausalLM(nn.Module):
if (("mlp.experts." in name or "mlp.shared_expert." in name)
and name not in params_dict):
continue
if name not in params_dict:
continue
param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(param, loaded_weight, shard_id)
@ -451,6 +455,9 @@ class Qwen2MoeForCausalLM(nn.Module):
if (("mlp.experts." in name or "mlp.shared_expert." in name)
and name not in params_dict):
continue
if name not in params_dict:
continue
param = params_dict[name]
weight_loader = getattr(param, "weight_loader",
default_weight_loader)