[Bugfix]: Fix Tensorizer test failures (#6835)
This commit is contained in:
parent
55712941e5
commit
969d032265
@ -220,7 +220,6 @@ steps:
|
|||||||
|
|
||||||
- label: Tensorizer Test
|
- label: Tensorizer Test
|
||||||
#mirror_hardwares: [amd]
|
#mirror_hardwares: [amd]
|
||||||
soft_fail: true
|
|
||||||
fast_check: true
|
fast_check: true
|
||||||
commands:
|
commands:
|
||||||
- apt-get install -y curl libsodium23
|
- apt-get install -y curl libsodium23
|
||||||
|
45
tests/tensorizer_loader/conftest.py
Normal file
45
tests/tensorizer_loader/conftest.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# isort: skip_file
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import gc
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import ray
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from vllm.distributed import (destroy_distributed_environment,
|
||||||
|
destroy_model_parallel)
|
||||||
|
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
destroy_model_parallel()
|
||||||
|
destroy_distributed_environment()
|
||||||
|
with contextlib.suppress(AssertionError):
|
||||||
|
torch.distributed.destroy_process_group()
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
ray.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def should_do_global_cleanup_after_test(request) -> bool:
|
||||||
|
"""Allow subdirectories to skip global cleanup by overriding this fixture.
|
||||||
|
This can provide a ~10x speedup for non-GPU unit tests since they don't need
|
||||||
|
to initialize torch.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def cleanup_fixture(should_do_global_cleanup_after_test: bool):
|
||||||
|
yield
|
||||||
|
if should_do_global_cleanup_after_test:
|
||||||
|
cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def tensorizer_config():
|
||||||
|
config = TensorizerConfig(tensorizer_uri="vllm")
|
||||||
|
return config
|
@ -40,7 +40,6 @@ model_ref = "facebook/opt-125m"
|
|||||||
tensorize_model_for_testing_script = os.path.join(
|
tensorize_model_for_testing_script = os.path.join(
|
||||||
os.path.dirname(__file__), "tensorize_vllm_model_for_testing.py")
|
os.path.dirname(__file__), "tensorize_vllm_model_for_testing.py")
|
||||||
|
|
||||||
|
|
||||||
def is_curl_installed():
|
def is_curl_installed():
|
||||||
try:
|
try:
|
||||||
subprocess.check_call(['curl', '--version'])
|
subprocess.check_call(['curl', '--version'])
|
||||||
@ -63,10 +62,6 @@ def write_keyfile(keyfile_path: str):
|
|||||||
with open(keyfile_path, 'wb') as f:
|
with open(keyfile_path, 'wb') as f:
|
||||||
f.write(encryption_params.key)
|
f.write(encryption_params.key)
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def tensorizer_config():
|
|
||||||
config = TensorizerConfig(tensorizer_uri="vllm")
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
@patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent')
|
@patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent')
|
||||||
@ -105,6 +100,7 @@ def test_can_deserialize_s3(vllm_runner):
|
|||||||
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
||||||
def test_deserialized_encrypted_vllm_model_has_same_outputs(
|
def test_deserialized_encrypted_vllm_model_has_same_outputs(
|
||||||
vllm_runner, tmp_path):
|
vllm_runner, tmp_path):
|
||||||
|
cleanup()
|
||||||
with vllm_runner(model_ref) as vllm_model:
|
with vllm_runner(model_ref) as vllm_model:
|
||||||
model_path = tmp_path / (model_ref + ".tensors")
|
model_path = tmp_path / (model_ref + ".tensors")
|
||||||
key_path = tmp_path / (model_ref + ".key")
|
key_path = tmp_path / (model_ref + ".key")
|
||||||
@ -316,6 +312,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
|
|||||||
|
|
||||||
|
|
||||||
def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
|
def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
|
||||||
|
cleanup()
|
||||||
model_ref = "facebook/opt-125m"
|
model_ref = "facebook/opt-125m"
|
||||||
model_path = tmp_path / (model_ref + ".tensors")
|
model_path = tmp_path / (model_ref + ".tensors")
|
||||||
config = TensorizerConfig(tensorizer_uri=str(model_path))
|
config = TensorizerConfig(tensorizer_uri=str(model_path))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user