[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (#8975)
This commit is contained in:
parent
1425a1bcf9
commit
bce324487a
@ -207,8 +207,6 @@ steps:
|
|||||||
- vllm/spec_decode
|
- vllm/spec_decode
|
||||||
- tests/spec_decode
|
- tests/spec_decode
|
||||||
commands:
|
commands:
|
||||||
# See https://github.com/vllm-project/vllm/issues/5152
|
|
||||||
- export VLLM_ATTENTION_BACKEND=XFORMERS
|
|
||||||
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
|
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
|
||||||
- pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
|
- pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
|
||||||
|
|
||||||
|
@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
|
|||||||
worker.model_runner._gpu_advance_step.side_effect = ValueError(
|
worker.model_runner._gpu_advance_step.side_effect = ValueError(
|
||||||
exception_secret)
|
exception_secret)
|
||||||
|
|
||||||
seq_group_metadata_list, _, _ = create_batch(batch_size, k)
|
seq_group_metadata_list, _, _ = create_batch(batch_size,
|
||||||
|
k,
|
||||||
|
block_size=block_size,
|
||||||
|
num_gpu_blocks=num_gpu_blocks)
|
||||||
|
|
||||||
# Fallback (should not call) when num_steps=1.
|
# Fallback (should not call) when num_steps=1.
|
||||||
execute_model_req = ExecuteModelRequest(
|
execute_model_req = ExecuteModelRequest(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user