[Misc] Remove dangling references to --use-v2-block-manager
(#13492)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
4c82229898
commit
00b69c2d27
@ -66,8 +66,7 @@
|
|||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"speculative_model": "turboderp/Qwama-0.5B-Instruct",
|
"speculative_model": "turboderp/Qwama-0.5B-Instruct",
|
||||||
"num_speculative_tokens": 4,
|
"num_speculative_tokens": 4,
|
||||||
"speculative_draft_tensor_parallel_size": 1,
|
"speculative_draft_tensor_parallel_size": 1
|
||||||
"use_v2_block_manager": ""
|
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||||
|
@ -45,7 +45,7 @@ To perform the same with an online mode launch the server:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model facebook/opt-6.7b \
|
python -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model facebook/opt-6.7b \
|
||||||
--seed 42 -tp 1 --speculative_model facebook/opt-125m --use-v2-block-manager \
|
--seed 42 -tp 1 --speculative_model facebook/opt-125m \
|
||||||
--num_speculative_tokens 5 --gpu_memory_utilization 0.8
|
--num_speculative_tokens 5 --gpu_memory_utilization 0.8
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user