Signed-off-by: lvfei.lv <lvfei.lv@alibaba-inc.com>
This commit is contained in:
parent
9cdde47289
commit
24f6b9a713
@ -47,12 +47,10 @@ def test_filter_subtensors():
|
|||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def llama_3p2_1b_files():
|
def llama_3p2_1b_files():
|
||||||
with TemporaryDirectory() as cache_dir:
|
input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
|
||||||
input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
|
ignore_patterns=["*.bin*", "original/*"])
|
||||||
cache_dir=cache_dir,
|
|
||||||
ignore_patterns=["*.bin*", "original/*"])
|
|
||||||
|
|
||||||
yield input_dir
|
yield input_dir
|
||||||
|
|
||||||
|
|
||||||
def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
|
def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
|
||||||
@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
|
|||||||
|
|
||||||
# Copy metadata files to output directory
|
# Copy metadata files to output directory
|
||||||
for file in os.listdir(input_dir):
|
for file in os.listdir(input_dir):
|
||||||
if not any(
|
if os.path.isdir(os.path.join(input_dir, file)):
|
||||||
file.endswith(ext) and not os.path.isdir(file)
|
continue
|
||||||
for ext in weights_patterns):
|
if not any(file.endswith(ext) for ext in weights_patterns):
|
||||||
shutil.copy(f"{input_dir}/{file}", output_dir)
|
shutil.copy(f"{input_dir}/{file}", output_dir)
|
||||||
|
|
||||||
|
|
||||||
@ -81,7 +79,8 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
|
|||||||
@pytest.mark.parametrize("enable_lora", [False, True])
|
@pytest.mark.parametrize("enable_lora", [False, True])
|
||||||
@pytest.mark.parametrize("tp_size", [1, 2])
|
@pytest.mark.parametrize("tp_size", [1, 2])
|
||||||
def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
|
def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
|
||||||
llama_3p2_1b_files):
|
llama_3p2_1b_files,
|
||||||
|
monkeypatch: pytest.MonkeyPatch):
|
||||||
if num_gpus_available < tp_size:
|
if num_gpus_available < tp_size:
|
||||||
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
|
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
|
||||||
|
|
||||||
@ -89,6 +88,8 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
|
|||||||
gpu_memory_utilization = 0.8
|
gpu_memory_utilization = 0.8
|
||||||
input_dir = llama_3p2_1b_files
|
input_dir = llama_3p2_1b_files
|
||||||
ctx = mp.get_context("spawn")
|
ctx = mp.get_context("spawn")
|
||||||
|
# The interface in v1 engine has changed, run in v1 engine will hang.
|
||||||
|
monkeypatch.setenv("VLLM_USE_V1", "0")
|
||||||
|
|
||||||
# Run in separate processes for memory & CUDA isolation
|
# Run in separate processes for memory & CUDA isolation
|
||||||
with TemporaryDirectory() as output_dir:
|
with TemporaryDirectory() as output_dir:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user