[Misc] Add Qwen2MoeForCausalLM moe tuning support (#14276)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-03-05 23:11:29 +08:00 · 2025-03-05 23:11:29 +08:00 · 7bab4bb048
commit 7bab4bb048
parent e17e4488bd
1 changed files with 5 additions and 0 deletions
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@ -509,6 +509,11 @@ def main(args: argparse.Namespace):
        intermediate_size = config.moe_intermediate_size
        shard_intermediate_size = 2 * intermediate_size // args.tp_size
        block_quant_shape = config.quantization_config['weight_block_size']
+    elif config.architectures[0] == "Qwen2MoeForCausalLM":
+        E = config.num_experts
+        topk = config.num_experts_per_tok
+        intermediate_size = config.moe_intermediate_size
+        shard_intermediate_size = 2 * intermediate_size // args.tp_size
    else:
        # Default: Mixtral.
        E = config.num_local_experts