Enforce that TP > 1 is not supported for Mamba2 if Quantization is Enabled. (#14617)

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>
2025-03-21 08:44:37 +08:00 · 2025-03-21 08:44:37 +08:00 · 06dd08256f
commit 06dd08256f
parent 2b22290ce0
1 changed files with 21 additions and 16 deletions
--- a/vllm/model_executor/layers/mamba/mamba_mixer2.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@ -251,6 +251,9 @@ class MambaMixer2(CustomOp):
                "then num_groups must equal 1."
            )
        assert self.tp_size == 1 or quant_config is None, \
            "Tensor parallel currently not supported for quantized models."
        self.ssm_state_size = ssm_state_size
        self.activation = activation
@ -331,6 +334,8 @@ class MambaMixer2(CustomOp):
                ], self.tp_size, tp_rank)
            })
        if quant_config is None:
            # - quant layers do not have a weight loader
            delattr(self.in_proj.weight, "weight_loader")
            set_weight_attrs(
                self.in_proj.weight,