[Bugfix] Use LoadFormat values for vllm serve --load-format (#7784)

This commit is contained in:
Michael Goin 2024-08-22 14:37:08 -04:00 committed by GitHub
parent 57792ed469
commit 15310b5101
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9,8 +9,8 @@ import torch
import vllm.envs as envs
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
ObservabilityConfig, ParallelConfig,
EngineConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ObservabilityConfig, ParallelConfig,
PromptAdapterConfig, SchedulerConfig,
SpeculativeConfig, TokenizerPoolConfig)
from vllm.executor.executor_base import ExecutorBase
@ -214,10 +214,7 @@ class EngineArgs:
'--load-format',
type=str,
default=EngineArgs.load_format,
choices=[
'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer',
'bitsandbytes'
],
choices=[f.value for f in LoadFormat],
help='The format of the model weights to load.\n\n'
'* "auto" will try to load the weights in the safetensors format '
'and fall back to the pytorch bin format if safetensors format '