[Doc]: Update OpenAI-Compatible Server
documents (#12082)
This commit is contained in:
parent
de0526f668
commit
57e729e874
@ -238,7 +238,7 @@ class EngineArgs:
|
||||
choices=get_args(TaskOption),
|
||||
help='The task to use the model for. Each vLLM instance only '
|
||||
'supports one task, even if the same model can be used for '
|
||||
'multiple tasks. When the model only supports one task, "auto" '
|
||||
'multiple tasks. When the model only supports one task, ``"auto"`` '
|
||||
'can be used to select it; otherwise, you must specify explicitly '
|
||||
'which task to use.')
|
||||
parser.add_argument(
|
||||
@ -250,7 +250,7 @@ class EngineArgs:
|
||||
parser.add_argument(
|
||||
'--skip-tokenizer-init',
|
||||
action='store_true',
|
||||
help='Skip initialization of tokenizer and detokenizer')
|
||||
help='Skip initialization of tokenizer and detokenizer.')
|
||||
parser.add_argument(
|
||||
'--revision',
|
||||
type=nullable_str,
|
||||
@ -401,7 +401,7 @@ class EngineArgs:
|
||||
parser.add_argument(
|
||||
'--worker-use-ray',
|
||||
action='store_true',
|
||||
help='Deprecated, use --distributed-executor-backend=ray.')
|
||||
help='Deprecated, use ``--distributed-executor-backend=ray``.')
|
||||
parser.add_argument('--pipeline-parallel-size',
|
||||
'-pp',
|
||||
type=int,
|
||||
@ -430,7 +430,7 @@ class EngineArgs:
|
||||
choices=[8, 16, 32, 64, 128],
|
||||
help='Token block size for contiguous chunks of '
|
||||
'tokens. This is ignored on neuron devices and '
|
||||
'set to max-model-len. On CUDA devices, '
|
||||
'set to ``--max-model-len``. On CUDA devices, '
|
||||
'only block sizes up to 32 are supported. '
|
||||
'On HPU devices, block size defaults to 128.')
|
||||
|
||||
@ -439,12 +439,12 @@ class EngineArgs:
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=EngineArgs.enable_prefix_caching,
|
||||
help="Enables automatic prefix caching. "
|
||||
"Use --no-enable-prefix-caching to disable explicitly.",
|
||||
"Use ``--no-enable-prefix-caching`` to disable explicitly.",
|
||||
)
|
||||
parser.add_argument('--disable-sliding-window',
|
||||
action='store_true',
|
||||
help='Disables sliding window, '
|
||||
'capping to sliding window size')
|
||||
'capping to sliding window size.')
|
||||
parser.add_argument('--use-v2-block-manager',
|
||||
action='store_true',
|
||||
default=True,
|
||||
@ -861,7 +861,7 @@ class EngineArgs:
|
||||
"of the provided names. The model name in the model "
|
||||
"field of a response will be the first name in this "
|
||||
"list. If not specified, the model name will be the "
|
||||
"same as the `--model` argument. Noted that this name(s) "
|
||||
"same as the ``--model`` argument. Noted that this name(s) "
|
||||
"will also be used in `model_name` tag content of "
|
||||
"prometheus metrics, if multiple names provided, metrics "
|
||||
"tag will take the first one.")
|
||||
@ -881,7 +881,7 @@ class EngineArgs:
|
||||
default=None,
|
||||
help="Valid choices are " +
|
||||
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
|
||||
". It makes sense to set this only if --otlp-traces-endpoint is"
|
||||
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
|
||||
" set. If set, it will collect detailed traces for the specified "
|
||||
"modules. This involves use of possibly costly and or blocking "
|
||||
"operations and hence might have a performance impact.")
|
||||
|
@ -79,29 +79,29 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
parser.add_argument("--host",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
help="host name")
|
||||
parser.add_argument("--port", type=int, default=8000, help="port number")
|
||||
help="Host name.")
|
||||
parser.add_argument("--port", type=int, default=8000, help="Port number.")
|
||||
parser.add_argument(
|
||||
"--uvicorn-log-level",
|
||||
type=str,
|
||||
default="info",
|
||||
choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
|
||||
help="log level for uvicorn")
|
||||
help="Log level for uvicorn.")
|
||||
parser.add_argument("--allow-credentials",
|
||||
action="store_true",
|
||||
help="allow credentials")
|
||||
help="Allow credentials.")
|
||||
parser.add_argument("--allowed-origins",
|
||||
type=json.loads,
|
||||
default=["*"],
|
||||
help="allowed origins")
|
||||
help="Allowed origins.")
|
||||
parser.add_argument("--allowed-methods",
|
||||
type=json.loads,
|
||||
default=["*"],
|
||||
help="allowed methods")
|
||||
help="Allowed methods.")
|
||||
parser.add_argument("--allowed-headers",
|
||||
type=json.loads,
|
||||
default=["*"],
|
||||
help="allowed headers")
|
||||
help="Allowed headers.")
|
||||
parser.add_argument("--api-key",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
@ -115,10 +115,10 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
action=LoRAParserAction,
|
||||
help="LoRA module configurations in either 'name=path' format"
|
||||
"or JSON format. "
|
||||
"Example (old format): 'name=path' "
|
||||
"Example (old format): ``'name=path'`` "
|
||||
"Example (new format): "
|
||||
"'{\"name\": \"name\", \"local_path\": \"path\", "
|
||||
"\"base_model_name\": \"id\"}'")
|
||||
"``{\"name\": \"name\", \"local_path\": \"path\", "
|
||||
"\"base_model_name\": \"id\"}``")
|
||||
parser.add_argument(
|
||||
"--prompt-adapters",
|
||||
type=nullable_str,
|
||||
@ -132,7 +132,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
default=None,
|
||||
help="The file path to the chat template, "
|
||||
"or the template in single-line form "
|
||||
"for the specified model")
|
||||
"for the specified model.")
|
||||
parser.add_argument(
|
||||
'--chat-template-content-format',
|
||||
type=str,
|
||||
@ -141,38 +141,39 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
help='The format to render message content within a chat template.'
|
||||
'\n\n'
|
||||
'* "string" will render the content as a string. '
|
||||
'Example: "Hello World"\n'
|
||||
'Example: ``"Hello World"``\n'
|
||||
'* "openai" will render the content as a list of dictionaries, '
|
||||
'similar to OpenAI schema. '
|
||||
'Example: [{"type": "text", "text": "Hello world!"}]')
|
||||
'Example: ``[{"type": "text", "text": "Hello world!"}]``')
|
||||
parser.add_argument("--response-role",
|
||||
type=nullable_str,
|
||||
default="assistant",
|
||||
help="The role name to return if "
|
||||
"`request.add_generation_prompt=true`.")
|
||||
"``request.add_generation_prompt=true``.")
|
||||
parser.add_argument("--ssl-keyfile",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
help="The file path to the SSL key file")
|
||||
help="The file path to the SSL key file.")
|
||||
parser.add_argument("--ssl-certfile",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
help="The file path to the SSL cert file")
|
||||
help="The file path to the SSL cert file.")
|
||||
parser.add_argument("--ssl-ca-certs",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
help="The CA certificates file")
|
||||
help="The CA certificates file.")
|
||||
parser.add_argument(
|
||||
"--ssl-cert-reqs",
|
||||
type=int,
|
||||
default=int(ssl.CERT_NONE),
|
||||
help="Whether client certificate is required (see stdlib ssl module's)"
|
||||
help="Whether client certificate is required (see stdlib ssl module's)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root-path",
|
||||
type=nullable_str,
|
||||
default=None,
|
||||
help="FastAPI root_path when app is behind a path based routing proxy")
|
||||
help="FastAPI root_path when app is behind a path based routing proxy."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--middleware",
|
||||
type=nullable_str,
|
||||
@ -182,15 +183,15 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
"We accept multiple --middleware arguments. "
|
||||
"The value should be an import path. "
|
||||
"If a function is provided, vLLM will add it to the server "
|
||||
"using @app.middleware('http'). "
|
||||
"using ``@app.middleware('http')``. "
|
||||
"If a class is provided, vLLM will add it to the server "
|
||||
"using app.add_middleware(). ")
|
||||
"using ``app.add_middleware()``. ")
|
||||
parser.add_argument(
|
||||
"--return-tokens-as-token-ids",
|
||||
action="store_true",
|
||||
help="When --max-logprobs is specified, represents single tokens as "
|
||||
"strings of the form 'token_id:{token_id}' so that tokens that "
|
||||
"are not JSON-encodable can be identified.")
|
||||
help="When ``--max-logprobs`` is specified, represents single tokens "
|
||||
" as strings of the form 'token_id:{token_id}' so that tokens "
|
||||
"that are not JSON-encodable can be identified.")
|
||||
parser.add_argument(
|
||||
"--disable-frontend-multiprocessing",
|
||||
action="store_true",
|
||||
@ -205,9 +206,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
"--enable-auto-tool-choice",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=
|
||||
"Enable auto tool choice for supported models. Use --tool-call-parser"
|
||||
" to specify which parser to use")
|
||||
help="Enable auto tool choice for supported models. Use "
|
||||
"``--tool-call-parser`` to specify which parser to use.")
|
||||
|
||||
valid_tool_parsers = ToolParserManager.tool_parsers.keys()
|
||||
parser.add_argument(
|
||||
@ -219,7 +219,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
help=
|
||||
"Select the tool call parser depending on the model that you're using."
|
||||
" This is used to parse the model-generated tool call into OpenAI API "
|
||||
"format. Required for --enable-auto-tool-choice.")
|
||||
"format. Required for ``--enable-auto-tool-choice``.")
|
||||
|
||||
parser.add_argument(
|
||||
"--tool-parser-plugin",
|
||||
@ -228,7 +228,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
help=
|
||||
"Special the tool parser plugin write to parse the model-generated tool"
|
||||
" into OpenAI API format, the name register in this plugin can be used "
|
||||
"in --tool-call-parser.")
|
||||
"in ``--tool-call-parser``.")
|
||||
|
||||
parser = AsyncEngineArgs.add_cli_args(parser)
|
||||
|
||||
@ -243,7 +243,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
"--disable-fastapi-docs",
|
||||
action='store_true',
|
||||
default=False,
|
||||
help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
|
||||
help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-prompt-tokens-details",
|
||||
|
Loading…
x
Reference in New Issue
Block a user