77 lines
2.2 KiB
Python
77 lines
2.2 KiB
Python
"""Tests for HF_HUB_OFFLINE mode"""
|
|
import importlib
|
|
import sys
|
|
import weakref
|
|
|
|
import pytest
|
|
|
|
from vllm import LLM
|
|
from vllm.distributed import cleanup_dist_env_and_memory
|
|
|
|
MODEL_NAME = "facebook/opt-125m"
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm():
|
|
# pytest caches the fixture so we use weakref.proxy to
|
|
# enable garbage collection
|
|
llm = LLM(model=MODEL_NAME,
|
|
max_num_batched_tokens=4096,
|
|
tensor_parallel_size=1,
|
|
gpu_memory_utilization=0.10,
|
|
enforce_eager=True)
|
|
|
|
with llm.deprecate_legacy_api():
|
|
yield weakref.proxy(llm)
|
|
|
|
del llm
|
|
|
|
cleanup_dist_env_and_memory()
|
|
|
|
|
|
@pytest.mark.skip_global_cleanup
|
|
def test_offline_mode(llm: LLM, monkeypatch):
|
|
# we use the llm fixture to ensure the model files are in-cache
|
|
del llm
|
|
|
|
# Set HF to offline mode and ensure we can still construct an LLM
|
|
try:
|
|
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
|
|
# Need to re-import huggingface_hub and friends to setup offline mode
|
|
_re_import_modules()
|
|
# Cached model files should be used in offline mode
|
|
LLM(model=MODEL_NAME,
|
|
max_num_batched_tokens=4096,
|
|
tensor_parallel_size=1,
|
|
gpu_memory_utilization=0.20,
|
|
enforce_eager=True)
|
|
finally:
|
|
# Reset the environment after the test
|
|
# NB: Assuming tests are run in online mode
|
|
monkeypatch.delenv("HF_HUB_OFFLINE")
|
|
_re_import_modules()
|
|
pass
|
|
|
|
|
|
def _re_import_modules():
|
|
hf_hub_module_names = [
|
|
k for k in sys.modules if k.startswith("huggingface_hub")
|
|
]
|
|
transformers_module_names = [
|
|
k for k in sys.modules if k.startswith("transformers")
|
|
and not k.startswith("transformers_modules")
|
|
]
|
|
|
|
reload_exception = None
|
|
for module_name in hf_hub_module_names + transformers_module_names:
|
|
try:
|
|
importlib.reload(sys.modules[module_name])
|
|
except Exception as e:
|
|
reload_exception = e
|
|
# Try to continue clean up so that other tests are less likely to
|
|
# be affected
|
|
|
|
# Error this test if reloading a module failed
|
|
if reload_exception is not None:
|
|
raise reload_exception
|