[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (#8975)

2024-09-30 17:51:40 -07:00 · 2024-09-30 17:51:40 -07:00 · bce324487a
commit bce324487a
parent 1425a1bcf9
2 changed files with 4 additions and 3 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -207,8 +207,6 @@ steps:
  - vllm/spec_decode
  - tests/spec_decode
  commands:
-    # See https://github.com/vllm-project/vllm/issues/5152
-    - export VLLM_ATTENTION_BACKEND=XFORMERS
    - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
    - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py

--- a/tests/spec_decode/test_multi_step_worker.py
+++ b/tests/spec_decode/test_multi_step_worker.py
@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
    worker.model_runner._gpu_advance_step.side_effect = ValueError(
        exception_secret)

-    seq_group_metadata_list, _, _ = create_batch(batch_size, k)
+    seq_group_metadata_list, _, _ = create_batch(batch_size,
+                                                 k,
+                                                 block_size=block_size,
+                                                 num_gpu_blocks=num_gpu_blocks)

    # Fallback (should not call) when num_steps=1.
    execute_model_req = ExecuteModelRequest(