[Bugfix] Further clean up LoRA test (#14422)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
70da0c0748
commit
12c29a881f
@ -185,11 +185,6 @@ def mixtral_lora_files():
|
||||
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def mixtral_lora_files_all_target_modules():
|
||||
return snapshot_download(repo_id="dyang415/mixtral-lora-v0")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def gemma_lora_files():
|
||||
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
|
||||
|
@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
|
||||
prompts=prompts) == expected_lora_output
|
||||
assert do_sample(llm, mixtral_lora_files, lora_id=2,
|
||||
prompts=prompts) == expected_lora_output
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tp_size", [4])
|
||||
@pytest.mark.parametrize("fully_shard", [True, False])
|
||||
def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
|
||||
tp_size, fully_shard):
|
||||
"""This LoRA model has all supported Mixtral target modules"""
|
||||
|
||||
if torch.cuda.device_count() < tp_size:
|
||||
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
|
||||
|
||||
prompts = [
|
||||
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:", # noqa: E501
|
||||
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:", # noqa: E501
|
||||
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:", # noqa: E501
|
||||
]
|
||||
|
||||
llm = vllm.LLM(
|
||||
MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_num_seqs=16,
|
||||
max_loras=4,
|
||||
distributed_executor_backend="ray",
|
||||
tensor_parallel_size=tp_size,
|
||||
fully_sharded_loras=fully_shard,
|
||||
max_lora_rank=32,
|
||||
)
|
||||
|
||||
expected_lora_output = [
|
||||
"A: Nothing happens if you touch the eyes of a blind man.",
|
||||
"A: add heat",
|
||||
"1: Craig",
|
||||
]
|
||||
|
||||
assert do_sample(llm,
|
||||
mixtral_lora_files_all_target_modules,
|
||||
lora_id=1,
|
||||
prompts=prompts) == expected_lora_output
|
||||
assert do_sample(llm,
|
||||
mixtral_lora_files_all_target_modules,
|
||||
lora_id=2,
|
||||
prompts=prompts) == expected_lora_output
|
||||
|
@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
|
||||
model):
|
||||
if num_gpus_available < 2:
|
||||
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
|
||||
|
||||
if model.quantization == "GPTQ":
|
||||
pytest.skip("GPTQ lora outputs are just incredibly unstable")
|
||||
llm_tp1 = vllm.LLM(
|
||||
model=model.model_path,
|
||||
enable_lora=True,
|
||||
|
Loading…
x
Reference in New Issue
Block a user