[CI] Fix failing FP8 cpu offload test (#13170)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
09972e716c
commit
14b7899d10
@ -1,5 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
# Expanded quantized model tests for CPU offloading
|
# Expanded quantized model tests for CPU offloading
|
||||||
# Base tests: tests/basic_correctness/test_cpu_offload.py
|
# Base tests: tests/basic_correctness/test_cpu_offload.py
|
||||||
|
|
||||||
@ -14,13 +14,13 @@ from ..utils import compare_two_settings
|
|||||||
reason="fp8 is not supported on this GPU type.")
|
reason="fp8 is not supported on this GPU type.")
|
||||||
def test_cpu_offload_fp8():
|
def test_cpu_offload_fp8():
|
||||||
# Test quantization of an unquantized checkpoint
|
# Test quantization of an unquantized checkpoint
|
||||||
compare_two_settings("meta-llama/Meta-Llama-3-8B-Instruct",
|
compare_two_settings("meta-llama/Llama-3.2-1B-Instruct",
|
||||||
["--quantization", "fp8"],
|
["--quantization", "fp8"],
|
||||||
["--quantization", "fp8", "--cpu-offload-gb", "2"],
|
["--quantization", "fp8", "--cpu-offload-gb", "1"],
|
||||||
max_wait_seconds=480)
|
max_wait_seconds=480)
|
||||||
# Test loading a quantized checkpoint
|
# Test loading a quantized checkpoint
|
||||||
compare_two_settings("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", [],
|
compare_two_settings("neuralmagic/Qwen2-1.5B-Instruct-FP8", [],
|
||||||
["--cpu-offload-gb", "2"],
|
["--cpu-offload-gb", "1"],
|
||||||
max_wait_seconds=480)
|
max_wait_seconds=480)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user