[Bugfix] Further clean up LoRA test (#14422)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
70da0c0748
commit
12c29a881f
@ -185,11 +185,6 @@ def mixtral_lora_files():
|
|||||||
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
|
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def mixtral_lora_files_all_target_modules():
|
|
||||||
return snapshot_download(repo_id="dyang415/mixtral-lora-v0")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def gemma_lora_files():
|
def gemma_lora_files():
|
||||||
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
|
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
|
||||||
|
@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
|
|||||||
prompts=prompts) == expected_lora_output
|
prompts=prompts) == expected_lora_output
|
||||||
assert do_sample(llm, mixtral_lora_files, lora_id=2,
|
assert do_sample(llm, mixtral_lora_files, lora_id=2,
|
||||||
prompts=prompts) == expected_lora_output
|
prompts=prompts) == expected_lora_output
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tp_size", [4])
|
|
||||||
@pytest.mark.parametrize("fully_shard", [True, False])
|
|
||||||
def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
|
|
||||||
tp_size, fully_shard):
|
|
||||||
"""This LoRA model has all supported Mixtral target modules"""
|
|
||||||
|
|
||||||
if torch.cuda.device_count() < tp_size:
|
|
||||||
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
|
|
||||||
|
|
||||||
prompts = [
|
|
||||||
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:", # noqa: E501
|
|
||||||
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:", # noqa: E501
|
|
||||||
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:", # noqa: E501
|
|
||||||
]
|
|
||||||
|
|
||||||
llm = vllm.LLM(
|
|
||||||
MODEL_PATH,
|
|
||||||
enable_lora=True,
|
|
||||||
max_num_seqs=16,
|
|
||||||
max_loras=4,
|
|
||||||
distributed_executor_backend="ray",
|
|
||||||
tensor_parallel_size=tp_size,
|
|
||||||
fully_sharded_loras=fully_shard,
|
|
||||||
max_lora_rank=32,
|
|
||||||
)
|
|
||||||
|
|
||||||
expected_lora_output = [
|
|
||||||
"A: Nothing happens if you touch the eyes of a blind man.",
|
|
||||||
"A: add heat",
|
|
||||||
"1: Craig",
|
|
||||||
]
|
|
||||||
|
|
||||||
assert do_sample(llm,
|
|
||||||
mixtral_lora_files_all_target_modules,
|
|
||||||
lora_id=1,
|
|
||||||
prompts=prompts) == expected_lora_output
|
|
||||||
assert do_sample(llm,
|
|
||||||
mixtral_lora_files_all_target_modules,
|
|
||||||
lora_id=2,
|
|
||||||
prompts=prompts) == expected_lora_output
|
|
||||||
|
@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
|
|||||||
model):
|
model):
|
||||||
if num_gpus_available < 2:
|
if num_gpus_available < 2:
|
||||||
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
|
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
|
||||||
|
if model.quantization == "GPTQ":
|
||||||
|
pytest.skip("GPTQ lora outputs are just incredibly unstable")
|
||||||
llm_tp1 = vllm.LLM(
|
llm_tp1 = vllm.LLM(
|
||||||
model=model.model_path,
|
model=model.model_path,
|
||||||
enable_lora=True,
|
enable_lora=True,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user