[ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping (#16031)

Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
Michael Goin 2025-04-07 19:28:14 -06:00 committed by GitHub
parent 652907b354
commit 21802c4b6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -116,7 +116,9 @@ class Fp8Config(QuantizationConfig):
from vllm.attention.layer import Attention # Avoid circular import
if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
if is_layer_skipped(prefix=prefix,
ignored_layers=self.ignored_layers,
fused_mapping=self.packed_modules_mapping):
return UnquantizedLinearMethod()
return Fp8LinearMethod(self)
elif isinstance(layer, FusedMoE):