[CI/Build] Further clean up LoRA tests (#15920)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
cdb57015a7
commit
4203926f10
@ -289,7 +289,7 @@ steps:
|
||||
source_file_dependencies:
|
||||
- vllm/lora
|
||||
- tests/lora
|
||||
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py
|
||||
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
|
||||
parallelism: 4
|
||||
|
||||
- label: PyTorch Fullgraph Smoke Test # 9min
|
||||
@ -602,8 +602,6 @@ steps:
|
||||
# requires multi-GPU testing for validation.
|
||||
- pytest -v -s -x lora/test_chatglm3_tp.py
|
||||
- pytest -v -s -x lora/test_llama_tp.py
|
||||
- pytest -v -s -x lora/test_minicpmv_tp.py
|
||||
- pytest -v -s -x lora/test_transfomers_model.py
|
||||
|
||||
|
||||
- label: Weight Loading Multiple GPU Test # 33min
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
import tempfile
|
||||
from collections import OrderedDict
|
||||
from typing import TypedDict
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
class ContextIDInfo(TypedDict):
|
||||
lora_id: int
|
||||
context_length: str
|
||||
|
||||
|
||||
class ContextInfo(TypedDict):
|
||||
lora: str
|
||||
context_length: str
|
||||
|
||||
|
||||
LONG_LORA_INFOS: list[ContextIDInfo] = [{
|
||||
"lora_id": 1,
|
||||
"context_length": "16k",
|
||||
}, {
|
||||
"lora_id": 2,
|
||||
"context_length": "16k",
|
||||
}, {
|
||||
"lora_id": 3,
|
||||
"context_length": "32k",
|
||||
}]
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def should_do_global_cleanup_after_test(request) -> bool:
|
||||
"""Allow subdirectories to skip global cleanup by overriding this fixture.
|
||||
|
@ -59,7 +59,7 @@ DEVICES = ([
|
||||
# prefill stage(True) or decode stage(False)
|
||||
STAGES = [True, False]
|
||||
|
||||
NUM_RANDOM_SEEDS = 10
|
||||
NUM_RANDOM_SEEDS = 6
|
||||
|
||||
VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128
|
||||
|
||||
|
@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
|
||||
enable_chunked_prefill=True,
|
||||
)
|
||||
generate_and_test(llm, sql_lora_files)
|
||||
|
||||
|
||||
@multi_gpu_test(num_gpus=4)
|
||||
@create_new_process_for_each_test()
|
||||
def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
|
||||
|
||||
llm = vllm.LLM(
|
||||
MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_num_seqs=16,
|
||||
max_loras=4,
|
||||
tensor_parallel_size=4,
|
||||
fully_sharded_loras=True,
|
||||
enable_lora_bias=True,
|
||||
enable_chunked_prefill=True,
|
||||
)
|
||||
generate_and_test(llm, sql_lora_files)
|
||||
|
@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
||||
@pytest.mark.xfail(
|
||||
current_platform.is_rocm(),
|
||||
reason="MiniCPM-V dependency xformers incompatible with ROCm")
|
||||
@create_new_process_for_each_test()
|
||||
def test_minicpmv_lora(minicpmv_lora_files):
|
||||
llm = vllm.LLM(
|
||||
MODEL_PATH,
|
||||
|
@ -1,7 +1,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import pytest
|
||||
|
||||
import vllm
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ..utils import create_new_process_for_each_test, multi_gpu_test
|
||||
|
||||
@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
||||
return generated_texts
|
||||
|
||||
|
||||
@create_new_process_for_each_test()
|
||||
def test_ilama_lora(ilama_lora_files):
|
||||
llm = vllm.LLM(MODEL_PATH,
|
||||
max_model_len=1024,
|
||||
@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
|
||||
assert output2[i] == EXPECTED_LORA_OUTPUT[i]
|
||||
|
||||
|
||||
@pytest.mark.skipif(current_platform.is_cuda_alike(),
|
||||
reason="Skipping to avoid redundant model tests")
|
||||
@multi_gpu_test(num_gpus=4)
|
||||
@create_new_process_for_each_test()
|
||||
def test_ilama_lora_tp4(ilama_lora_files):
|
||||
@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
|
||||
assert output2[i] == EXPECTED_LORA_OUTPUT[i]
|
||||
|
||||
|
||||
@pytest.mark.skipif(current_platform.is_cuda_alike(),
|
||||
reason="Skipping to avoid redundant model tests")
|
||||
@multi_gpu_test(num_gpus=4)
|
||||
@create_new_process_for_each_test()
|
||||
def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):
|
||||
|
Loading…
x
Reference in New Issue
Block a user