vllm/tests/kv_transfer/test_module.py

67 lines
2.2 KiB
Python
Raw Normal View History

# SPDX-License-Identifier: Apache-2.0
import subprocess
import sys
import pytest
import torch
def run_python_script(script_name, timeout):
script_name = f'kv_transfer/{script_name}'
try:
# Start both processes asynchronously using Popen
process0 = subprocess.Popen(
[sys.executable, script_name],
env={"RANK":
"0"}, # Set the RANK environment variable for process 0
stdout=sys.stdout, # Pipe stdout to current stdout
stderr=sys.stderr, # Pipe stderr to current stderr
)
process1 = subprocess.Popen(
[sys.executable, script_name],
env={"RANK":
"1"}, # Set the RANK environment variable for process 1
stdout=sys.stdout, # Pipe stdout to current stdout
stderr=sys.stderr, # Pipe stderr to current stderr
)
# Wait for both processes to complete, with a timeout
process0.wait(timeout=timeout)
process1.wait(timeout=timeout)
# Check the return status of both processes
if process0.returncode != 0:
pytest.fail(
f"Test {script_name} failed for RANK=0, {process0.returncode}")
if process1.returncode != 0:
pytest.fail(
f"Test {script_name} failed for RANK=1, {process1.returncode}")
except subprocess.TimeoutExpired:
# If either process times out, terminate both and fail the test
process0.terminate()
process1.terminate()
pytest.fail(f"Test {script_name} timed out")
except Exception as e:
pytest.fail(f"Test {script_name} failed with error: {str(e)}")
# Define the test cases using pytest's parametrize
@pytest.mark.parametrize(
"script_name,timeout",
[
("test_lookup_buffer.py",
60), # Second test case with a 60-second timeout
("test_send_recv.py", 120) # First test case with a 120-second timeout
])
def test_run_python_script(script_name, timeout):
# Check the number of GPUs
if torch.cuda.device_count() < 2:
pytest.skip(
f"Skipping test {script_name} because <2 GPUs are available")
# Run the test if there are at least 2 GPUs
run_python_script(script_name, timeout)