[Frontend]-config-cli-args (#7737)
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Kaunil Dhruv <kaunil_dhruv@intuit.com>
This commit is contained in:
parent
98cef6a227
commit
058344f89a
@ -11,5 +11,6 @@ pydantic >= 2.8
|
|||||||
torch
|
torch
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers
|
transformers
|
||||||
|
openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
|
||||||
mistral_common >= 1.3.4
|
mistral_common >= 1.3.4
|
||||||
openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
|
openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
|
@ -111,6 +111,32 @@ directory [here](https://github.com/vllm-project/vllm/tree/main/examples/)
|
|||||||
:prog: vllm serve
|
:prog: vllm serve
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Config file
|
||||||
|
|
||||||
|
The `serve` module can also accept arguments from a config file in
|
||||||
|
`yaml` format. The arguments in the yaml must be specified using the
|
||||||
|
long form of the argument outlined [here](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server):
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# config.yaml
|
||||||
|
|
||||||
|
host: "127.0.0.1"
|
||||||
|
port: 6379
|
||||||
|
uvicorn-log-level: "info"
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ vllm serve SOME_MODEL --config config.yaml
|
||||||
|
```
|
||||||
|
---
|
||||||
|
**NOTE**
|
||||||
|
In case an argument is supplied using command line and the config file, the value from the commandline will take precedence.
|
||||||
|
The order of priorities is `command line > config file values > defaults`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Tool calling in the chat completion API
|
## Tool calling in the chat completion API
|
||||||
vLLM supports only named function calling in the chat completion API. The `tool_choice` options `auto` and `required` are **not yet supported** but on the roadmap.
|
vLLM supports only named function calling in the chat completion API. The `tool_choice` options `auto` and `required` are **not yet supported** but on the roadmap.
|
||||||
|
|
||||||
|
@ -27,3 +27,4 @@ soundfile # Required for audio processing
|
|||||||
gguf == 0.9.1
|
gguf == 0.9.1
|
||||||
importlib_metadata
|
importlib_metadata
|
||||||
mistral_common >= 1.3.4
|
mistral_common >= 1.3.4
|
||||||
|
pyyaml
|
2
tests/data/test_config.yaml
Normal file
2
tests/data/test_config.yaml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
port: 12312
|
||||||
|
tensor_parallel_size: 2
|
@ -132,6 +132,16 @@ def parser():
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def parser_with_config():
|
||||||
|
parser = FlexibleArgumentParser()
|
||||||
|
parser.add_argument('serve')
|
||||||
|
parser.add_argument('--config', type=str)
|
||||||
|
parser.add_argument('--port', type=int)
|
||||||
|
parser.add_argument('--tensor-parallel-size', type=int)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def test_underscore_to_dash(parser):
|
def test_underscore_to_dash(parser):
|
||||||
args = parser.parse_args(['--image_input_type', 'pixel_values'])
|
args = parser.parse_args(['--image_input_type', 'pixel_values'])
|
||||||
assert args.image_input_type == 'pixel_values'
|
assert args.image_input_type == 'pixel_values'
|
||||||
@ -176,3 +186,37 @@ def test_missing_required_argument(parser):
|
|||||||
parser.add_argument('--required-arg', required=True)
|
parser.add_argument('--required-arg', required=True)
|
||||||
with pytest.raises(SystemExit):
|
with pytest.raises(SystemExit):
|
||||||
parser.parse_args([])
|
parser.parse_args([])
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_override_to_config(parser_with_config):
|
||||||
|
args = parser_with_config.parse_args([
|
||||||
|
'serve', '--config', './data/test_config.yaml',
|
||||||
|
'--tensor-parallel-size', '3'
|
||||||
|
])
|
||||||
|
assert args.tensor_parallel_size == 3
|
||||||
|
args = parser_with_config.parse_args([
|
||||||
|
'serve', '--tensor-parallel-size', '3', '--config',
|
||||||
|
'./data/test_config.yaml'
|
||||||
|
])
|
||||||
|
assert args.tensor_parallel_size == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_config_args(parser_with_config):
|
||||||
|
args = parser_with_config.parse_args(
|
||||||
|
['serve', '--config', './data/test_config.yaml'])
|
||||||
|
assert args.tensor_parallel_size == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_config_file(parser_with_config):
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
parser_with_config.parse_args(['serve', '--config', 'test_config.yml'])
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
parser_with_config.parse_args(
|
||||||
|
['serve', '--config', './data/test_config.json'])
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
parser_with_config.parse_args([
|
||||||
|
'serve', '--tensor-parallel-size', '3', '--config', '--batch-size',
|
||||||
|
'32'
|
||||||
|
])
|
||||||
|
@ -125,6 +125,15 @@ def main():
|
|||||||
serve_parser.add_argument("model_tag",
|
serve_parser.add_argument("model_tag",
|
||||||
type=str,
|
type=str,
|
||||||
help="The model tag to serve")
|
help="The model tag to serve")
|
||||||
|
serve_parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
type=str,
|
||||||
|
default='',
|
||||||
|
required=False,
|
||||||
|
help="Read CLI options from a config file."
|
||||||
|
"Must be a YAML with the following options:"
|
||||||
|
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
|
||||||
|
)
|
||||||
serve_parser = make_arg_parser(serve_parser)
|
serve_parser = make_arg_parser(serve_parser)
|
||||||
serve_parser.set_defaults(dispatch_function=serve)
|
serve_parser.set_defaults(dispatch_function=serve)
|
||||||
|
|
||||||
|
101
vllm/utils.py
101
vllm/utils.py
@ -25,6 +25,7 @@ import numpy.typing as npt
|
|||||||
import psutil
|
import psutil
|
||||||
import torch
|
import torch
|
||||||
import torch.types
|
import torch.types
|
||||||
|
import yaml
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
from typing_extensions import ParamSpec, TypeIs, assert_never
|
from typing_extensions import ParamSpec, TypeIs, assert_never
|
||||||
|
|
||||||
@ -1093,6 +1094,9 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
|
|||||||
if args is None:
|
if args is None:
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
if '--config' in args:
|
||||||
|
args = FlexibleArgumentParser._pull_args_from_config(args)
|
||||||
|
|
||||||
# Convert underscores to dashes and vice versa in argument names
|
# Convert underscores to dashes and vice versa in argument names
|
||||||
processed_args = []
|
processed_args = []
|
||||||
for arg in args:
|
for arg in args:
|
||||||
@ -1109,6 +1113,103 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
|
|||||||
|
|
||||||
return super().parse_args(processed_args, namespace)
|
return super().parse_args(processed_args, namespace)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _pull_args_from_config(args: List[str]) -> List[str]:
|
||||||
|
"""Method to pull arguments specified in the config file
|
||||||
|
into the command-line args variable.
|
||||||
|
|
||||||
|
The arguments in config file will be inserted between
|
||||||
|
the argument list.
|
||||||
|
|
||||||
|
example:
|
||||||
|
```yaml
|
||||||
|
port: 12323
|
||||||
|
tensor-parallel-size: 4
|
||||||
|
```
|
||||||
|
```python
|
||||||
|
$: vllm {serve,chat,complete} "facebook/opt-12B" \
|
||||||
|
--config config.yaml -tp 2
|
||||||
|
$: args = [
|
||||||
|
"serve,chat,complete",
|
||||||
|
"facebook/opt-12B",
|
||||||
|
'--config', 'config.yaml',
|
||||||
|
'-tp', '2'
|
||||||
|
]
|
||||||
|
$: args = [
|
||||||
|
"serve,chat,complete",
|
||||||
|
"facebook/opt-12B",
|
||||||
|
'--port', '12323',
|
||||||
|
'--tensor-parallel-size', '4',
|
||||||
|
'-tp', '2'
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Please note how the config args are inserted after the sub command.
|
||||||
|
this way the order of priorities is maintained when these are args
|
||||||
|
parsed by super().
|
||||||
|
"""
|
||||||
|
assert args.count(
|
||||||
|
'--config') <= 1, "More than one config file specified!"
|
||||||
|
|
||||||
|
index = args.index('--config')
|
||||||
|
if index == len(args) - 1:
|
||||||
|
raise ValueError("No config file specified! \
|
||||||
|
Please check your command-line arguments.")
|
||||||
|
|
||||||
|
file_path = args[index + 1]
|
||||||
|
|
||||||
|
config_args = FlexibleArgumentParser._load_config_file(file_path)
|
||||||
|
|
||||||
|
# 0th index is for {serve,chat,complete}
|
||||||
|
# followed by config args
|
||||||
|
# followed by rest of cli args.
|
||||||
|
# maintaining this order will enforce the precedence
|
||||||
|
# of cli > config > defaults
|
||||||
|
args = [args[0]] + config_args + args[1:index] + args[index + 2:]
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_config_file(file_path: str) -> List[str]:
|
||||||
|
"""Loads a yaml file and returns the key value pairs as a
|
||||||
|
flattened list with argparse like pattern
|
||||||
|
```yaml
|
||||||
|
port: 12323
|
||||||
|
tensor-parallel-size: 4
|
||||||
|
```
|
||||||
|
returns:
|
||||||
|
processed_args: list[str] = [
|
||||||
|
'--port': '12323',
|
||||||
|
'--tensor-parallel-size': '4'
|
||||||
|
]
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
extension: str = file_path.split('.')[-1]
|
||||||
|
if extension not in ('yaml', 'yml'):
|
||||||
|
raise ValueError(
|
||||||
|
"Config file must be of a yaml/yml type.\
|
||||||
|
%s supplied", extension)
|
||||||
|
|
||||||
|
# only expecting a flat dictionary of atomic types
|
||||||
|
processed_args: List[str] = []
|
||||||
|
|
||||||
|
config: Dict[str, Union[int, str]] = {}
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r') as config_file:
|
||||||
|
config = yaml.safe_load(config_file)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.error(
|
||||||
|
"Unable to read the config file at %s. \
|
||||||
|
Make sure path is correct", file_path)
|
||||||
|
raise ex
|
||||||
|
|
||||||
|
for key, value in config.items():
|
||||||
|
processed_args.append('--' + key)
|
||||||
|
processed_args.append(str(value))
|
||||||
|
|
||||||
|
return processed_args
|
||||||
|
|
||||||
|
|
||||||
async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args,
|
async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user