[CI] Fix failing FP8 cpu offload test (#13170)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-02-12 14:16:06 -05:00 committed by GitHub
parent 09972e716c
commit 14b7899d10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -14,13 +14,13 @@ from ..utils import compare_two_settings
reason="fp8 is not supported on this GPU type.") reason="fp8 is not supported on this GPU type.")
def test_cpu_offload_fp8(): def test_cpu_offload_fp8():
# Test quantization of an unquantized checkpoint # Test quantization of an unquantized checkpoint
compare_two_settings("meta-llama/Meta-Llama-3-8B-Instruct", compare_two_settings("meta-llama/Llama-3.2-1B-Instruct",
["--quantization", "fp8"], ["--quantization", "fp8"],
["--quantization", "fp8", "--cpu-offload-gb", "2"], ["--quantization", "fp8", "--cpu-offload-gb", "1"],
max_wait_seconds=480) max_wait_seconds=480)
# Test loading a quantized checkpoint # Test loading a quantized checkpoint
compare_two_settings("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", [], compare_two_settings("neuralmagic/Qwen2-1.5B-Instruct-FP8", [],
["--cpu-offload-gb", "2"], ["--cpu-offload-gb", "1"],
max_wait_seconds=480) max_wait_seconds=480)