[Bugfix] Fix tensor parallel for qwen2 classification model (#10297)

Signed-off-by: Isotr0py <2037008807@qq.com>
2024-11-14 10:54:59 +08:00 · 2024-11-14 10:54:59 +08:00 · 15bb8330aa
commit 15bb8330aa
parent ac49b59d8b
2 changed files with 9 additions and 4 deletions
--- a/tests/models/embedding/language/test_cls_models.py
+++ b/tests/models/embedding/language/test_cls_models.py
@ -21,14 +21,14 @@ def test_classification_models(
    model: str,
    dtype: str,
 ) -> None:
    with vllm_runner(model, dtype=dtype) as vllm_model:
        vllm_outputs = vllm_model.classify(example_prompts)
    with hf_runner(model,
                   dtype=dtype,
                   auto_cls=AutoModelForSequenceClassification) as hf_model:
        hf_outputs = hf_model.classify(example_prompts)
    with vllm_runner(model, dtype=dtype) as vllm_model:
        vllm_outputs = vllm_model.classify(example_prompts)
    print(hf_outputs, vllm_outputs)
    # check logits difference
--- a/vllm/model_executor/models/qwen2_cls.py
+++ b/vllm/model_executor/models/qwen2_cls.py
@ -69,9 +69,14 @@ class Qwen2ForSequenceClassification(nn.Module):
        self.model = Qwen2Model(vllm_config=vllm_config,
                                prefix=maybe_prefix(prefix, "model"))
        # hidden_states from Qwen2Model has been reduced,
        # the input of score layer is not parallelized.
        self.score = RowParallelLinear(config.hidden_size,
                                       config.num_labels,
-                                       quant_config=quant_config)
+                                       quant_config=quant_config,
                                       input_is_parallel=False,
                                       bias=False,
                                       prefix=maybe_prefix(prefix, "score"))
        self._pooler = Pooler.from_config_with_defaults(
            pooler_config,
            pooling_type=PoolingType.LAST,