[SpecDecoding] Update MLPSpeculator CI tests to use smaller model (#6714)

This commit is contained in:
Nick Hill 2024-07-24 00:34:22 -07:00 committed by GitHub
parent 5e8ca973eb
commit c882a7f5b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -24,14 +24,14 @@ import pytest
from .conftest import run_greedy_equality_correctness_test
# main model
MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
MAIN_MODEL = "JackFram/llama-160m"
# speculative model
SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
# max. number of speculative tokens: this corresponds to
# n_predict in the config.json of the speculator model.
MAX_SPEC_TOKENS = 5
MAX_SPEC_TOKENS = 3
# precision
PRECISION = "float32"