2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2024-09-12 12:11:57 -06:00
|
|
|
"""Tests for HF_HUB_OFFLINE mode"""
|
|
|
|
import importlib
|
|
|
|
import sys
|
|
|
|
|
|
|
|
import pytest
|
2025-02-07 02:37:41 -03:00
|
|
|
import urllib3
|
2024-09-12 12:11:57 -06:00
|
|
|
|
|
|
|
from vllm import LLM
|
2024-10-18 14:30:55 -07:00
|
|
|
from vllm.distributed import cleanup_dist_env_and_memory
|
2024-09-12 12:11:57 -06:00
|
|
|
|
2024-10-19 01:12:32 +00:00
|
|
|
MODEL_CONFIGS = [
|
|
|
|
{
|
|
|
|
"model": "facebook/opt-125m",
|
|
|
|
"enforce_eager": True,
|
|
|
|
"gpu_memory_utilization": 0.20,
|
|
|
|
"max_model_len": 64,
|
|
|
|
"max_num_batched_tokens": 64,
|
|
|
|
"max_num_seqs": 64,
|
|
|
|
"tensor_parallel_size": 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"model": "mistralai/Mistral-7B-Instruct-v0.1",
|
|
|
|
"enforce_eager": True,
|
|
|
|
"gpu_memory_utilization": 0.95,
|
|
|
|
"max_model_len": 64,
|
|
|
|
"max_num_batched_tokens": 64,
|
|
|
|
"max_num_seqs": 64,
|
|
|
|
"tensor_parallel_size": 1,
|
|
|
|
"tokenizer_mode": "mistral",
|
|
|
|
},
|
2025-02-07 02:37:41 -03:00
|
|
|
{
|
|
|
|
"model": "sentence-transformers/all-MiniLM-L12-v2",
|
|
|
|
"enforce_eager": True,
|
|
|
|
"gpu_memory_utilization": 0.20,
|
|
|
|
"max_model_len": 64,
|
|
|
|
"max_num_batched_tokens": 64,
|
|
|
|
"max_num_seqs": 64,
|
|
|
|
"tensor_parallel_size": 1,
|
|
|
|
},
|
2024-10-19 01:12:32 +00:00
|
|
|
]
|
2024-09-12 12:11:57 -06:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
2024-10-19 01:12:32 +00:00
|
|
|
def cache_models():
|
|
|
|
# Cache model files first
|
|
|
|
for model_config in MODEL_CONFIGS:
|
|
|
|
LLM(**model_config)
|
|
|
|
cleanup_dist_env_and_memory()
|
2024-09-12 12:11:57 -06:00
|
|
|
|
2024-10-19 01:12:32 +00:00
|
|
|
yield
|
2024-09-12 12:11:57 -06:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip_global_cleanup
|
2024-10-19 01:12:32 +00:00
|
|
|
@pytest.mark.usefixtures("cache_models")
|
2025-03-17 11:35:57 +08:00
|
|
|
def test_offline_mode(monkeypatch: pytest.MonkeyPatch):
|
2024-09-12 12:11:57 -06:00
|
|
|
# Set HF to offline mode and ensure we can still construct an LLM
|
2025-03-17 11:35:57 +08:00
|
|
|
with monkeypatch.context() as m:
|
|
|
|
try:
|
|
|
|
m.setenv("HF_HUB_OFFLINE", "1")
|
|
|
|
m.setenv("VLLM_NO_USAGE_STATS", "1")
|
2025-02-07 02:37:41 -03:00
|
|
|
|
2025-03-17 11:35:57 +08:00
|
|
|
def disable_connect(*args, **kwargs):
|
|
|
|
raise RuntimeError("No http calls allowed")
|
2025-02-07 02:37:41 -03:00
|
|
|
|
2025-03-17 11:35:57 +08:00
|
|
|
m.setattr(
|
|
|
|
urllib3.connection.HTTPConnection,
|
|
|
|
"connect",
|
|
|
|
disable_connect,
|
|
|
|
)
|
|
|
|
m.setattr(
|
|
|
|
urllib3.connection.HTTPSConnection,
|
|
|
|
"connect",
|
|
|
|
disable_connect,
|
|
|
|
)
|
2025-02-07 02:37:41 -03:00
|
|
|
|
2025-03-17 11:35:57 +08:00
|
|
|
# Need to re-import huggingface_hub
|
|
|
|
# and friends to setup offline mode
|
|
|
|
_re_import_modules()
|
|
|
|
# Cached model files should be used in offline mode
|
|
|
|
for model_config in MODEL_CONFIGS:
|
|
|
|
LLM(**model_config)
|
|
|
|
finally:
|
|
|
|
# Reset the environment after the test
|
|
|
|
# NB: Assuming tests are run in online mode
|
|
|
|
_re_import_modules()
|
2024-09-12 12:11:57 -06:00
|
|
|
|
|
|
|
|
|
|
|
def _re_import_modules():
|
|
|
|
hf_hub_module_names = [
|
|
|
|
k for k in sys.modules if k.startswith("huggingface_hub")
|
|
|
|
]
|
|
|
|
transformers_module_names = [
|
|
|
|
k for k in sys.modules if k.startswith("transformers")
|
|
|
|
and not k.startswith("transformers_modules")
|
|
|
|
]
|
|
|
|
|
|
|
|
reload_exception = None
|
|
|
|
for module_name in hf_hub_module_names + transformers_module_names:
|
|
|
|
try:
|
|
|
|
importlib.reload(sys.modules[module_name])
|
|
|
|
except Exception as e:
|
|
|
|
reload_exception = e
|
|
|
|
# Try to continue clean up so that other tests are less likely to
|
|
|
|
# be affected
|
|
|
|
|
|
|
|
# Error this test if reloading a module failed
|
|
|
|
if reload_exception is not None:
|
|
|
|
raise reload_exception
|