[ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping (#16031)
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
652907b354
commit
21802c4b6d
@ -116,7 +116,9 @@ class Fp8Config(QuantizationConfig):
|
|||||||
from vllm.attention.layer import Attention # Avoid circular import
|
from vllm.attention.layer import Attention # Avoid circular import
|
||||||
|
|
||||||
if isinstance(layer, LinearBase):
|
if isinstance(layer, LinearBase):
|
||||||
if is_layer_skipped(prefix, self.ignored_layers):
|
if is_layer_skipped(prefix=prefix,
|
||||||
|
ignored_layers=self.ignored_layers,
|
||||||
|
fused_mapping=self.packed_modules_mapping):
|
||||||
return UnquantizedLinearMethod()
|
return UnquantizedLinearMethod()
|
||||||
return Fp8LinearMethod(self)
|
return Fp8LinearMethod(self)
|
||||||
elif isinstance(layer, FusedMoE):
|
elif isinstance(layer, FusedMoE):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user