From 00b69c2d274023c47edc69454573ac656b300828 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 19 Feb 2025 03:37:26 +0000 Subject: [PATCH] [Misc] Remove dangling references to `--use-v2-block-manager` (#13492) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/nightly-benchmarks/tests/serving-tests.json | 3 +-- docs/source/features/spec_decode.md | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json index facb0eac..415171e2 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json @@ -66,8 +66,7 @@ "swap_space": 16, "speculative_model": "turboderp/Qwama-0.5B-Instruct", "num_speculative_tokens": 4, - "speculative_draft_tensor_parallel_size": 1, - "use_v2_block_manager": "" + "speculative_draft_tensor_parallel_size": 1 }, "client_parameters": { "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", diff --git a/docs/source/features/spec_decode.md b/docs/source/features/spec_decode.md index d2255eff..cc8d6fce 100644 --- a/docs/source/features/spec_decode.md +++ b/docs/source/features/spec_decode.md @@ -45,7 +45,7 @@ To perform the same with an online mode launch the server: ```bash python -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model facebook/opt-6.7b \ - --seed 42 -tp 1 --speculative_model facebook/opt-125m --use-v2-block-manager \ + --seed 42 -tp 1 --speculative_model facebook/opt-125m \ --num_speculative_tokens 5 --gpu_memory_utilization 0.8 ```