[Misc] Move device options to a single place (#8322)
This commit is contained in:
parent
73202dbe77
commit
8baa454937
@ -10,7 +10,7 @@ import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.engine.arg_utils import DEVICE_OPTIONS, EngineArgs
|
||||
from vllm.inputs import PromptInputs
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
@ -205,13 +205,11 @@ if __name__ == '__main__':
|
||||
default=None,
|
||||
help=('path to save the pytorch profiler output. Can be visualized '
|
||||
'with ui.perfetto.dev or Tensorboard.'))
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
|
||||
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
|
||||
'CPU.')
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=DEVICE_OPTIONS,
|
||||
help='device type for vLLM execution')
|
||||
parser.add_argument('--block-size',
|
||||
type=int,
|
||||
default=16,
|
||||
|
@ -11,7 +11,7 @@ from tqdm import tqdm
|
||||
from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
||||
PreTrainedTokenizerBase)
|
||||
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
from vllm.engine.arg_utils import DEVICE_OPTIONS, AsyncEngineArgs, EngineArgs
|
||||
from vllm.entrypoints.openai.api_server import (
|
||||
build_async_engine_client_from_engine_args)
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
@ -451,13 +451,11 @@ if __name__ == "__main__":
|
||||
'accuracy issues. FP8_E5M2 (without scaling) is only supported on '
|
||||
'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is '
|
||||
'instead supported for common inference criteria.')
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
|
||||
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
|
||||
'CPU.')
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=DEVICE_OPTIONS,
|
||||
help='device type for vLLM execution')
|
||||
parser.add_argument(
|
||||
"--num-scheduler-steps",
|
||||
type=int,
|
||||
|
@ -26,6 +26,16 @@ logger = init_logger(__name__)
|
||||
|
||||
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
|
||||
|
||||
DEVICE_OPTIONS = [
|
||||
"auto",
|
||||
"cuda",
|
||||
"neuron",
|
||||
"cpu",
|
||||
"openvino",
|
||||
"tpu",
|
||||
"xpu",
|
||||
]
|
||||
|
||||
|
||||
def nullable_str(val: str):
|
||||
if not val or val == "None":
|
||||
@ -553,10 +563,7 @@ class EngineArgs:
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default=EngineArgs.device,
|
||||
choices=[
|
||||
"auto", "cuda", "neuron", "cpu", "openvino",
|
||||
"tpu", "xpu"
|
||||
],
|
||||
choices=DEVICE_OPTIONS,
|
||||
help='Device type for vLLM execution.')
|
||||
parser.add_argument('--num-scheduler-steps',
|
||||
type=int,
|
||||
|
Loading…
x
Reference in New Issue
Block a user