[Doc] Improve help examples for --compilation-config
(#16729)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
e37073efd7
commit
c16fb5dae8
@ -134,6 +134,6 @@ The cudagraphs are captured and managed by the compiler backend, and replayed wh
|
||||
|
||||
By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`:
|
||||
|
||||
`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
|
||||
`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation-config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
|
||||
|
||||
Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture.
|
||||
|
@ -53,12 +53,20 @@ def test_compilation_config():
|
||||
assert args.compilation_config.level == 3
|
||||
|
||||
# set to string form of a dict
|
||||
args = parser.parse_args(["--compilation-config", "{'level': 3}"])
|
||||
assert args.compilation_config.level == 3
|
||||
args = parser.parse_args([
|
||||
"--compilation-config",
|
||||
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
|
||||
])
|
||||
assert (args.compilation_config.level == 3 and
|
||||
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
|
||||
|
||||
# set to string form of a dict
|
||||
args = parser.parse_args(["--compilation-config={'level': 3}"])
|
||||
assert args.compilation_config.level == 3
|
||||
args = parser.parse_args([
|
||||
"--compilation-config="
|
||||
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
|
||||
])
|
||||
assert (args.compilation_config.level == 3 and
|
||||
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
|
||||
|
||||
|
||||
def test_prefix_cache_default():
|
||||
|
@ -939,10 +939,11 @@ class EngineArgs:
|
||||
'testing only. level 3 is the recommended level '
|
||||
'for production.\n'
|
||||
'To specify the full compilation config, '
|
||||
'use a JSON string.\n'
|
||||
'use a JSON string, e.g. ``{"level": 3, '
|
||||
'"cudagraph_capture_sizes": [1, 2, 4, 8]}``\n'
|
||||
'Following the convention of traditional '
|
||||
'compilers, using -O without space is also '
|
||||
'supported. -O3 is equivalent to -O 3.')
|
||||
'compilers, using ``-O`` without space is also '
|
||||
'supported. ``-O3`` is equivalent to ``-O 3``.')
|
||||
|
||||
parser.add_argument('--kv-transfer-config',
|
||||
type=KVTransferConfig.from_cli,
|
||||
|
Loading…
x
Reference in New Issue
Block a user