[Doc] Improve help examples for --compilation-config
(#16729)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
e37073efd7
commit
c16fb5dae8
@ -134,6 +134,6 @@ The cudagraphs are captured and managed by the compiler backend, and replayed wh
|
|||||||
|
|
||||||
By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`:
|
By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`:
|
||||||
|
|
||||||
`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
|
`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation-config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
|
||||||
|
|
||||||
Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture.
|
Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture.
|
||||||
|
@ -53,12 +53,20 @@ def test_compilation_config():
|
|||||||
assert args.compilation_config.level == 3
|
assert args.compilation_config.level == 3
|
||||||
|
|
||||||
# set to string form of a dict
|
# set to string form of a dict
|
||||||
args = parser.parse_args(["--compilation-config", "{'level': 3}"])
|
args = parser.parse_args([
|
||||||
assert args.compilation_config.level == 3
|
"--compilation-config",
|
||||||
|
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
|
||||||
|
])
|
||||||
|
assert (args.compilation_config.level == 3 and
|
||||||
|
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
|
||||||
|
|
||||||
# set to string form of a dict
|
# set to string form of a dict
|
||||||
args = parser.parse_args(["--compilation-config={'level': 3}"])
|
args = parser.parse_args([
|
||||||
assert args.compilation_config.level == 3
|
"--compilation-config="
|
||||||
|
"{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
|
||||||
|
])
|
||||||
|
assert (args.compilation_config.level == 3 and
|
||||||
|
args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
|
||||||
|
|
||||||
|
|
||||||
def test_prefix_cache_default():
|
def test_prefix_cache_default():
|
||||||
|
@ -939,10 +939,11 @@ class EngineArgs:
|
|||||||
'testing only. level 3 is the recommended level '
|
'testing only. level 3 is the recommended level '
|
||||||
'for production.\n'
|
'for production.\n'
|
||||||
'To specify the full compilation config, '
|
'To specify the full compilation config, '
|
||||||
'use a JSON string.\n'
|
'use a JSON string, e.g. ``{"level": 3, '
|
||||||
|
'"cudagraph_capture_sizes": [1, 2, 4, 8]}``\n'
|
||||||
'Following the convention of traditional '
|
'Following the convention of traditional '
|
||||||
'compilers, using -O without space is also '
|
'compilers, using ``-O`` without space is also '
|
||||||
'supported. -O3 is equivalent to -O 3.')
|
'supported. ``-O3`` is equivalent to ``-O 3``.')
|
||||||
|
|
||||||
parser.add_argument('--kv-transfer-config',
|
parser.add_argument('--kv-transfer-config',
|
||||||
type=KVTransferConfig.from_cli,
|
type=KVTransferConfig.from_cli,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user