[SpecDecoding] Update MLPSpeculator CI tests to use smaller model (#6714)
This commit is contained in:
parent
5e8ca973eb
commit
c882a7f5b3
@ -24,14 +24,14 @@ import pytest
|
|||||||
from .conftest import run_greedy_equality_correctness_test
|
from .conftest import run_greedy_equality_correctness_test
|
||||||
|
|
||||||
# main model
|
# main model
|
||||||
MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
|
MAIN_MODEL = "JackFram/llama-160m"
|
||||||
|
|
||||||
# speculative model
|
# speculative model
|
||||||
SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
|
SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
|
||||||
|
|
||||||
# max. number of speculative tokens: this corresponds to
|
# max. number of speculative tokens: this corresponds to
|
||||||
# n_predict in the config.json of the speculator model.
|
# n_predict in the config.json of the speculator model.
|
||||||
MAX_SPEC_TOKENS = 5
|
MAX_SPEC_TOKENS = 3
|
||||||
|
|
||||||
# precision
|
# precision
|
||||||
PRECISION = "float32"
|
PRECISION = "float32"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user