[Bugfix/CI] Fixup benchmark_moe.py (#12562)

Fixes `is_marlin` not being passed into `get_default_config`

Also allow `--tensor-parallel-size` in addition to `-tp` and `--tp-size`

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith 2025-02-01 00:41:35 -05:00 committed by GitHub
parent 35b7a05507
commit cfa134d247
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -343,9 +343,13 @@ class BenchmarkWorker:
op_config = get_moe_configs(num_experts, shard_intermediate_size // 2,
dtype_str)
if op_config is None:
config = get_default_config(num_tokens, num_experts,
shard_intermediate_size, hidden_size,
topk, dtype_str)
config = get_default_config(num_tokens,
num_experts,
shard_intermediate_size,
hidden_size,
topk,
dtype_str,
is_marlin=False)
else:
config = op_config[min(op_config.keys(),
key=lambda x: abs(x - num_tokens))]
@ -536,7 +540,11 @@ if __name__ == "__main__":
parser.add_argument("--model",
type=str,
default="mistralai/Mixtral-8x7B-Instruct-v0.1")
parser.add_argument("--tp-size", "-tp", type=int, default=2)
parser.add_argument("--tp-size",
"-tp",
"--tensor-parallel-size",
type=int,
default=2)
parser.add_argument("--dtype",
type=str,
choices=["auto", "fp8_w8a8", "int8_w8a16"],