[Doc] Improve help examples for --compilation-config (#16729)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-04-18 12:22:34 +08:00 committed by GitHub
parent e37073efd7
commit c16fb5dae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 17 additions and 8 deletions

View File

@ -134,6 +134,6 @@ The cudagraphs are captured and managed by the compiler backend, and replayed wh
By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`: By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`:
`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"` `VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation-config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture. Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture.

View File

@ -53,12 +53,20 @@ def test_compilation_config():
assert args.compilation_config.level == 3 assert args.compilation_config.level == 3
# set to string form of a dict # set to string form of a dict
args = parser.parse_args(["--compilation-config", "{'level': 3}"]) args = parser.parse_args([
assert args.compilation_config.level == 3 "--compilation-config",
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
])
assert (args.compilation_config.level == 3 and
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
# set to string form of a dict # set to string form of a dict
args = parser.parse_args(["--compilation-config={'level': 3}"]) args = parser.parse_args([
assert args.compilation_config.level == 3 "--compilation-config="
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
])
assert (args.compilation_config.level == 3 and
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
def test_prefix_cache_default(): def test_prefix_cache_default():

View File

@ -939,10 +939,11 @@ class EngineArgs:
'testing only. level 3 is the recommended level ' 'testing only. level 3 is the recommended level '
'for production.\n' 'for production.\n'
'To specify the full compilation config, ' 'To specify the full compilation config, '
'use a JSON string.\n' 'use a JSON string, e.g. ``{"level": 3, '
'"cudagraph_capture_sizes": [1, 2, 4, 8]}``\n'
'Following the convention of traditional ' 'Following the convention of traditional '
'compilers, using -O without space is also ' 'compilers, using ``-O`` without space is also '
'supported. -O3 is equivalent to -O 3.') 'supported. ``-O3`` is equivalent to ``-O 3``.')
parser.add_argument('--kv-transfer-config', parser.add_argument('--kv-transfer-config',
type=KVTransferConfig.from_cli, type=KVTransferConfig.from_cli,