[ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping (#16031)
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
652907b354
commit
21802c4b6d
@ -116,7 +116,9 @@ class Fp8Config(QuantizationConfig):
|
||||
from vllm.attention.layer import Attention # Avoid circular import
|
||||
|
||||
if isinstance(layer, LinearBase):
|
||||
if is_layer_skipped(prefix, self.ignored_layers):
|
||||
if is_layer_skipped(prefix=prefix,
|
||||
ignored_layers=self.ignored_layers,
|
||||
fused_mapping=self.packed_modules_mapping):
|
||||
return UnquantizedLinearMethod()
|
||||
return Fp8LinearMethod(self)
|
||||
elif isinstance(layer, FusedMoE):
|
||||
|
Loading…
x
Reference in New Issue
Block a user