[Doc]: Update OpenAI-Compatible Server documents (#12082)

This commit is contained in:
maang-h 2025-01-16 00:07:45 +08:00 committed by GitHub
parent de0526f668
commit 57e729e874
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 38 deletions

View File

@ -238,7 +238,7 @@ class EngineArgs:
choices=get_args(TaskOption), choices=get_args(TaskOption),
help='The task to use the model for. Each vLLM instance only ' help='The task to use the model for. Each vLLM instance only '
'supports one task, even if the same model can be used for ' 'supports one task, even if the same model can be used for '
'multiple tasks. When the model only supports one task, "auto" ' 'multiple tasks. When the model only supports one task, ``"auto"`` '
'can be used to select it; otherwise, you must specify explicitly ' 'can be used to select it; otherwise, you must specify explicitly '
'which task to use.') 'which task to use.')
parser.add_argument( parser.add_argument(
@ -250,7 +250,7 @@ class EngineArgs:
parser.add_argument( parser.add_argument(
'--skip-tokenizer-init', '--skip-tokenizer-init',
action='store_true', action='store_true',
help='Skip initialization of tokenizer and detokenizer') help='Skip initialization of tokenizer and detokenizer.')
parser.add_argument( parser.add_argument(
'--revision', '--revision',
type=nullable_str, type=nullable_str,
@ -401,7 +401,7 @@ class EngineArgs:
parser.add_argument( parser.add_argument(
'--worker-use-ray', '--worker-use-ray',
action='store_true', action='store_true',
help='Deprecated, use --distributed-executor-backend=ray.') help='Deprecated, use ``--distributed-executor-backend=ray``.')
parser.add_argument('--pipeline-parallel-size', parser.add_argument('--pipeline-parallel-size',
'-pp', '-pp',
type=int, type=int,
@ -430,7 +430,7 @@ class EngineArgs:
choices=[8, 16, 32, 64, 128], choices=[8, 16, 32, 64, 128],
help='Token block size for contiguous chunks of ' help='Token block size for contiguous chunks of '
'tokens. This is ignored on neuron devices and ' 'tokens. This is ignored on neuron devices and '
'set to max-model-len. On CUDA devices, ' 'set to ``--max-model-len``. On CUDA devices, '
'only block sizes up to 32 are supported. ' 'only block sizes up to 32 are supported. '
'On HPU devices, block size defaults to 128.') 'On HPU devices, block size defaults to 128.')
@ -439,12 +439,12 @@ class EngineArgs:
action=argparse.BooleanOptionalAction, action=argparse.BooleanOptionalAction,
default=EngineArgs.enable_prefix_caching, default=EngineArgs.enable_prefix_caching,
help="Enables automatic prefix caching. " help="Enables automatic prefix caching. "
"Use --no-enable-prefix-caching to disable explicitly.", "Use ``--no-enable-prefix-caching`` to disable explicitly.",
) )
parser.add_argument('--disable-sliding-window', parser.add_argument('--disable-sliding-window',
action='store_true', action='store_true',
help='Disables sliding window, ' help='Disables sliding window, '
'capping to sliding window size') 'capping to sliding window size.')
parser.add_argument('--use-v2-block-manager', parser.add_argument('--use-v2-block-manager',
action='store_true', action='store_true',
default=True, default=True,
@ -861,7 +861,7 @@ class EngineArgs:
"of the provided names. The model name in the model " "of the provided names. The model name in the model "
"field of a response will be the first name in this " "field of a response will be the first name in this "
"list. If not specified, the model name will be the " "list. If not specified, the model name will be the "
"same as the `--model` argument. Noted that this name(s) " "same as the ``--model`` argument. Noted that this name(s) "
"will also be used in `model_name` tag content of " "will also be used in `model_name` tag content of "
"prometheus metrics, if multiple names provided, metrics " "prometheus metrics, if multiple names provided, metrics "
"tag will take the first one.") "tag will take the first one.")
@ -881,7 +881,7 @@ class EngineArgs:
default=None, default=None,
help="Valid choices are " + help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) + ",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if --otlp-traces-endpoint is" ". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified " " set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking " "modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.") "operations and hence might have a performance impact.")

View File

@ -79,29 +79,29 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument("--host", parser.add_argument("--host",
type=nullable_str, type=nullable_str,
default=None, default=None,
help="host name") help="Host name.")
parser.add_argument("--port", type=int, default=8000, help="port number") parser.add_argument("--port", type=int, default=8000, help="Port number.")
parser.add_argument( parser.add_argument(
"--uvicorn-log-level", "--uvicorn-log-level",
type=str, type=str,
default="info", default="info",
choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'], choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
help="log level for uvicorn") help="Log level for uvicorn.")
parser.add_argument("--allow-credentials", parser.add_argument("--allow-credentials",
action="store_true", action="store_true",
help="allow credentials") help="Allow credentials.")
parser.add_argument("--allowed-origins", parser.add_argument("--allowed-origins",
type=json.loads, type=json.loads,
default=["*"], default=["*"],
help="allowed origins") help="Allowed origins.")
parser.add_argument("--allowed-methods", parser.add_argument("--allowed-methods",
type=json.loads, type=json.loads,
default=["*"], default=["*"],
help="allowed methods") help="Allowed methods.")
parser.add_argument("--allowed-headers", parser.add_argument("--allowed-headers",
type=json.loads, type=json.loads,
default=["*"], default=["*"],
help="allowed headers") help="Allowed headers.")
parser.add_argument("--api-key", parser.add_argument("--api-key",
type=nullable_str, type=nullable_str,
default=None, default=None,
@ -115,10 +115,10 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action=LoRAParserAction, action=LoRAParserAction,
help="LoRA module configurations in either 'name=path' format" help="LoRA module configurations in either 'name=path' format"
"or JSON format. " "or JSON format. "
"Example (old format): 'name=path' " "Example (old format): ``'name=path'`` "
"Example (new format): " "Example (new format): "
"'{\"name\": \"name\", \"local_path\": \"path\", " "``{\"name\": \"name\", \"local_path\": \"path\", "
"\"base_model_name\": \"id\"}'") "\"base_model_name\": \"id\"}``")
parser.add_argument( parser.add_argument(
"--prompt-adapters", "--prompt-adapters",
type=nullable_str, type=nullable_str,
@ -132,7 +132,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None, default=None,
help="The file path to the chat template, " help="The file path to the chat template, "
"or the template in single-line form " "or the template in single-line form "
"for the specified model") "for the specified model.")
parser.add_argument( parser.add_argument(
'--chat-template-content-format', '--chat-template-content-format',
type=str, type=str,
@ -141,38 +141,39 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help='The format to render message content within a chat template.' help='The format to render message content within a chat template.'
'\n\n' '\n\n'
'* "string" will render the content as a string. ' '* "string" will render the content as a string. '
'Example: "Hello World"\n' 'Example: ``"Hello World"``\n'
'* "openai" will render the content as a list of dictionaries, ' '* "openai" will render the content as a list of dictionaries, '
'similar to OpenAI schema. ' 'similar to OpenAI schema. '
'Example: [{"type": "text", "text": "Hello world!"}]') 'Example: ``[{"type": "text", "text": "Hello world!"}]``')
parser.add_argument("--response-role", parser.add_argument("--response-role",
type=nullable_str, type=nullable_str,
default="assistant", default="assistant",
help="The role name to return if " help="The role name to return if "
"`request.add_generation_prompt=true`.") "``request.add_generation_prompt=true``.")
parser.add_argument("--ssl-keyfile", parser.add_argument("--ssl-keyfile",
type=nullable_str, type=nullable_str,
default=None, default=None,
help="The file path to the SSL key file") help="The file path to the SSL key file.")
parser.add_argument("--ssl-certfile", parser.add_argument("--ssl-certfile",
type=nullable_str, type=nullable_str,
default=None, default=None,
help="The file path to the SSL cert file") help="The file path to the SSL cert file.")
parser.add_argument("--ssl-ca-certs", parser.add_argument("--ssl-ca-certs",
type=nullable_str, type=nullable_str,
default=None, default=None,
help="The CA certificates file") help="The CA certificates file.")
parser.add_argument( parser.add_argument(
"--ssl-cert-reqs", "--ssl-cert-reqs",
type=int, type=int,
default=int(ssl.CERT_NONE), default=int(ssl.CERT_NONE),
help="Whether client certificate is required (see stdlib ssl module's)" help="Whether client certificate is required (see stdlib ssl module's)."
) )
parser.add_argument( parser.add_argument(
"--root-path", "--root-path",
type=nullable_str, type=nullable_str,
default=None, default=None,
help="FastAPI root_path when app is behind a path based routing proxy") help="FastAPI root_path when app is behind a path based routing proxy."
)
parser.add_argument( parser.add_argument(
"--middleware", "--middleware",
type=nullable_str, type=nullable_str,
@ -182,15 +183,15 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"We accept multiple --middleware arguments. " "We accept multiple --middleware arguments. "
"The value should be an import path. " "The value should be an import path. "
"If a function is provided, vLLM will add it to the server " "If a function is provided, vLLM will add it to the server "
"using @app.middleware('http'). " "using ``@app.middleware('http')``. "
"If a class is provided, vLLM will add it to the server " "If a class is provided, vLLM will add it to the server "
"using app.add_middleware(). ") "using ``app.add_middleware()``. ")
parser.add_argument( parser.add_argument(
"--return-tokens-as-token-ids", "--return-tokens-as-token-ids",
action="store_true", action="store_true",
help="When --max-logprobs is specified, represents single tokens as " help="When ``--max-logprobs`` is specified, represents single tokens "
"strings of the form 'token_id:{token_id}' so that tokens that " " as strings of the form 'token_id:{token_id}' so that tokens "
"are not JSON-encodable can be identified.") "that are not JSON-encodable can be identified.")
parser.add_argument( parser.add_argument(
"--disable-frontend-multiprocessing", "--disable-frontend-multiprocessing",
action="store_true", action="store_true",
@ -205,9 +206,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--enable-auto-tool-choice", "--enable-auto-tool-choice",
action="store_true", action="store_true",
default=False, default=False,
help= help="Enable auto tool choice for supported models. Use "
"Enable auto tool choice for supported models. Use --tool-call-parser" "``--tool-call-parser`` to specify which parser to use.")
" to specify which parser to use")
valid_tool_parsers = ToolParserManager.tool_parsers.keys() valid_tool_parsers = ToolParserManager.tool_parsers.keys()
parser.add_argument( parser.add_argument(
@ -219,7 +219,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help= help=
"Select the tool call parser depending on the model that you're using." "Select the tool call parser depending on the model that you're using."
" This is used to parse the model-generated tool call into OpenAI API " " This is used to parse the model-generated tool call into OpenAI API "
"format. Required for --enable-auto-tool-choice.") "format. Required for ``--enable-auto-tool-choice``.")
parser.add_argument( parser.add_argument(
"--tool-parser-plugin", "--tool-parser-plugin",
@ -228,7 +228,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help= help=
"Special the tool parser plugin write to parse the model-generated tool" "Special the tool parser plugin write to parse the model-generated tool"
" into OpenAI API format, the name register in this plugin can be used " " into OpenAI API format, the name register in this plugin can be used "
"in --tool-call-parser.") "in ``--tool-call-parser``.")
parser = AsyncEngineArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)
@ -243,7 +243,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--disable-fastapi-docs", "--disable-fastapi-docs",
action='store_true', action='store_true',
default=False, default=False,
help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint" help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
) )
parser.add_argument( parser.add_argument(
"--enable-prompt-tokens-details", "--enable-prompt-tokens-details",