🐛 Fixup more test failures from memory profiling (#9563)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
parent
575dcebe9a
commit
ef7faad1b8
@ -0,0 +1,11 @@
|
|||||||
|
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
|
||||||
|
model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
|
||||||
|
tasks:
|
||||||
|
- name: "gsm8k"
|
||||||
|
metrics:
|
||||||
|
- name: "exact_match,strict-match"
|
||||||
|
value: 0.356
|
||||||
|
- name: "exact_match,flexible-extract"
|
||||||
|
value: 0.358
|
||||||
|
limit: 1000
|
||||||
|
num_fewshot: 5
|
@ -1,6 +1,6 @@
|
|||||||
Meta-Llama-3-8B-Instruct.yaml
|
Meta-Llama-3-8B-Instruct.yaml
|
||||||
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
|
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
|
||||||
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
|
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
|
||||||
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
|
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
|
||||||
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
|
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
|
||||||
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
|
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
|
||||||
|
@ -61,6 +61,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
|
|||||||
max_loras=4,
|
max_loras=4,
|
||||||
max_lora_rank=64,
|
max_lora_rank=64,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
gpu_memory_utilization=0.97 # This model is pretty big for CI gpus
|
||||||
)
|
)
|
||||||
|
|
||||||
output1 = do_sample(llm, minicpmv_lora_files, lora_id=1)
|
output1 = do_sample(llm, minicpmv_lora_files, lora_id=1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user