[Misc] [CI/Build] Speed up block manager CPU-only unit tests ~10x by opting-out of GPU cleanup (#3783)

2024-04-01 17:49:51 -07:00 · 2024-04-01 17:49:51 -07:00 · eb69d68804
commit eb69d68804
parent 7d4e1b85e7
3 changed files with 25 additions and 18 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -55,9 +55,19 @@ def cleanup():
    torch.cuda.empty_cache()
@pytest.fixture()
 def should_do_global_cleanup_after_test() -> bool:
    """Allow subdirectories to skip global cleanup by overriding this fixture.
    This can provide a ~10x speedup for non-GPU unit tests since they don't need
    to initialize torch.
    """
    return True
@pytest.fixture(autouse=True)
-def cleanup_fixture():
+def cleanup_fixture(should_do_global_cleanup_after_test: bool):
    yield
    if should_do_global_cleanup_after_test:
        cleanup()
--- a/tests/core/block/conftest.py
+++ b/tests/core/block/conftest.py
@ -0,0 +1,12 @@
 import pytest
@pytest.fixture()
 def should_do_global_cleanup_after_test() -> bool:
    """Disable the global cleanup fixture for tests in this directory. This
    provides a ~10x speedup for unit tests that don't load a model to GPU.
    This requires that tests in this directory clean up after themselves if they
    use the GPU.
    """
    return False
--- a/tests/core/block/e2e/conftest.py
+++ b/tests/core/block/e2e/conftest.py
@ -1,25 +1,10 @@
 import contextlib
 import gc
 import pytest
 import ray
 import torch
 from tests.conftest import cleanup
 from vllm import LLM
 from vllm.model_executor.parallel_utils.parallel_state import (
    destroy_model_parallel)
 from vllm.model_executor.utils import set_random_seed
 def cleanup():
    destroy_model_parallel()
    with contextlib.suppress(AssertionError):
        torch.distributed.destroy_process_group()
    gc.collect()
    torch.cuda.empty_cache()
    ray.shutdown()
@pytest.fixture
 def baseline_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
                           baseline_llm_kwargs, seed):