2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2024-07-30 14:48:50 -04:00
|
|
|
import functools
|
2024-07-26 23:02:25 -04:00
|
|
|
import gc
|
2024-08-13 09:20:20 +08:00
|
|
|
from typing import Callable, TypeVar
|
2024-07-26 23:02:25 -04:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
import torch
|
2024-08-13 09:20:20 +08:00
|
|
|
from typing_extensions import ParamSpec
|
2024-07-26 23:02:25 -04:00
|
|
|
|
2024-10-18 14:30:55 -07:00
|
|
|
from vllm.distributed import cleanup_dist_env_and_memory
|
2024-07-26 23:02:25 -04:00
|
|
|
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
|
|
|
|
|
|
|
|
|
2025-03-15 01:02:20 -04:00
|
|
|
@pytest.fixture(scope="function", autouse=True)
|
|
|
|
def use_v0_only(monkeypatch):
|
|
|
|
"""
|
|
|
|
Tensorizer only tested on V0 so far.
|
|
|
|
"""
|
|
|
|
monkeypatch.setenv('VLLM_USE_V1', '0')
|
|
|
|
|
|
|
|
|
2024-07-30 14:48:50 -04:00
|
|
|
@pytest.fixture(autouse=True)
|
2024-07-26 23:02:25 -04:00
|
|
|
def cleanup():
|
2024-10-18 14:30:55 -07:00
|
|
|
cleanup_dist_env_and_memory(shutdown_ray=True)
|
2024-07-26 23:02:25 -04:00
|
|
|
|
|
|
|
|
2024-08-13 09:20:20 +08:00
|
|
|
_P = ParamSpec("_P")
|
|
|
|
_R = TypeVar("_R")
|
2024-07-26 23:02:25 -04:00
|
|
|
|
2024-08-13 09:20:20 +08:00
|
|
|
|
|
|
|
def retry_until_skip(n: int):
|
|
|
|
|
|
|
|
def decorator_retry(func: Callable[_P, _R]) -> Callable[_P, _R]:
|
2024-07-26 23:02:25 -04:00
|
|
|
|
2024-07-30 14:48:50 -04:00
|
|
|
@functools.wraps(func)
|
2024-08-13 09:20:20 +08:00
|
|
|
def wrapper_retry(*args: _P.args, **kwargs: _P.kwargs) -> _R:
|
2024-07-30 14:48:50 -04:00
|
|
|
for i in range(n):
|
|
|
|
try:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
except AssertionError:
|
|
|
|
gc.collect()
|
|
|
|
torch.cuda.empty_cache()
|
|
|
|
if i == n - 1:
|
2024-08-13 09:20:20 +08:00
|
|
|
pytest.skip(f"Skipping test after {n} attempts.")
|
|
|
|
|
|
|
|
raise AssertionError("Code should not be reached")
|
2024-07-26 23:02:25 -04:00
|
|
|
|
2024-07-30 14:48:50 -04:00
|
|
|
return wrapper_retry
|
|
|
|
|
|
|
|
return decorator_retry
|
2024-07-26 23:02:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
|
|
def tensorizer_config():
|
|
|
|
config = TensorizerConfig(tensorizer_uri="vllm")
|
2024-07-30 14:48:50 -04:00
|
|
|
return config
|