vllm/tests/basic_correctness/test_cpu_offload.py

7 lines
176 B
Python

from ..utils import compare_two_settings
def test_cpu_offload():
compare_two_settings("meta-llama/Llama-2-7b-hf", [],
["--cpu-offload-gb", "4"])