From c38eba304674fdf9da4d881e46f103440e22a153 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 10 Jul 2024 15:04:07 +0200 Subject: [PATCH] [Bugfix] MLPSpeculator: Use ParallelLMHead in tie_weights=False case. (#6303) Signed-off-by: Thomas Parnell --- vllm/model_executor/models/mlp_speculator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mlp_speculator.py b/vllm/model_executor/models/mlp_speculator.py index 97f7ec74..d3aec06a 100644 --- a/vllm/model_executor/models/mlp_speculator.py +++ b/vllm/model_executor/models/mlp_speculator.py @@ -110,7 +110,7 @@ class MLPSpeculator(nn.Module): ]) self.head = nn.ModuleList([ - nn.Linear(self.inner_dim, self.vocab_size, bias=False) + ParallelLMHead(self.vocab_size, self.inner_dim, bias=False) for _ in range(self.max_speculative_tokens) ]) self.ln = nn.ModuleList([