[CI] Fix failing FP8 cpu offload test (#13170)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
09972e716c
commit
14b7899d10
@ -14,13 +14,13 @@ from ..utils import compare_two_settings
|
|||||||
reason="fp8 is not supported on this GPU type.")
|
reason="fp8 is not supported on this GPU type.")
|
||||||
def test_cpu_offload_fp8():
|
def test_cpu_offload_fp8():
|
||||||
# Test quantization of an unquantized checkpoint
|
# Test quantization of an unquantized checkpoint
|
||||||
compare_two_settings("meta-llama/Meta-Llama-3-8B-Instruct",
|
compare_two_settings("meta-llama/Llama-3.2-1B-Instruct",
|
||||||
["--quantization", "fp8"],
|
["--quantization", "fp8"],
|
||||||
["--quantization", "fp8", "--cpu-offload-gb", "2"],
|
["--quantization", "fp8", "--cpu-offload-gb", "1"],
|
||||||
max_wait_seconds=480)
|
max_wait_seconds=480)
|
||||||
# Test loading a quantized checkpoint
|
# Test loading a quantized checkpoint
|
||||||
compare_two_settings("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", [],
|
compare_two_settings("neuralmagic/Qwen2-1.5B-Instruct-FP8", [],
|
||||||
["--cpu-offload-gb", "2"],
|
["--cpu-offload-gb", "1"],
|
||||||
max_wait_seconds=480)
|
max_wait_seconds=480)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user