[Bugfix] Further clean up LoRA test (#14422)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li 2025-03-07 18:30:55 +08:00 committed by GitHub
parent 70da0c0748
commit 12c29a881f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 2 additions and 48 deletions

View File

@ -185,11 +185,6 @@ def mixtral_lora_files():
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
@pytest.fixture(scope="session")
def mixtral_lora_files_all_target_modules():
return snapshot_download(repo_id="dyang415/mixtral-lora-v0")
@pytest.fixture(scope="session")
def gemma_lora_files():
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")

View File

@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
prompts=prompts) == expected_lora_output
assert do_sample(llm, mixtral_lora_files, lora_id=2,
prompts=prompts) == expected_lora_output
@pytest.mark.parametrize("tp_size", [4])
@pytest.mark.parametrize("fully_shard", [True, False])
def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
tp_size, fully_shard):
"""This LoRA model has all supported Mixtral target modules"""
if torch.cuda.device_count() < tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
prompts = [
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:", # noqa: E501
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:", # noqa: E501
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:", # noqa: E501
]
llm = vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
distributed_executor_backend="ray",
tensor_parallel_size=tp_size,
fully_sharded_loras=fully_shard,
max_lora_rank=32,
)
expected_lora_output = [
"A: Nothing happens if you touch the eyes of a blind man.",
"A: add heat",
"1: Craig",
]
assert do_sample(llm,
mixtral_lora_files_all_target_modules,
lora_id=1,
prompts=prompts) == expected_lora_output
assert do_sample(llm,
mixtral_lora_files_all_target_modules,
lora_id=2,
prompts=prompts) == expected_lora_output

View File

@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
model):
if num_gpus_available < 2:
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
if model.quantization == "GPTQ":
pytest.skip("GPTQ lora outputs are just incredibly unstable")
llm_tp1 = vllm.LLM(
model=model.model_path,
enable_lora=True,