[Bugfix] Use LoadFormat values for vllm serve --load-format
(#7784)
This commit is contained in:
parent
57792ed469
commit
15310b5101
@ -9,8 +9,8 @@ import torch
|
|||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
|
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
|
||||||
EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
|
EngineConfig, LoadConfig, LoadFormat, LoRAConfig,
|
||||||
ObservabilityConfig, ParallelConfig,
|
ModelConfig, ObservabilityConfig, ParallelConfig,
|
||||||
PromptAdapterConfig, SchedulerConfig,
|
PromptAdapterConfig, SchedulerConfig,
|
||||||
SpeculativeConfig, TokenizerPoolConfig)
|
SpeculativeConfig, TokenizerPoolConfig)
|
||||||
from vllm.executor.executor_base import ExecutorBase
|
from vllm.executor.executor_base import ExecutorBase
|
||||||
@ -214,10 +214,7 @@ class EngineArgs:
|
|||||||
'--load-format',
|
'--load-format',
|
||||||
type=str,
|
type=str,
|
||||||
default=EngineArgs.load_format,
|
default=EngineArgs.load_format,
|
||||||
choices=[
|
choices=[f.value for f in LoadFormat],
|
||||||
'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer',
|
|
||||||
'bitsandbytes'
|
|
||||||
],
|
|
||||||
help='The format of the model weights to load.\n\n'
|
help='The format of the model weights to load.\n\n'
|
||||||
'* "auto" will try to load the weights in the safetensors format '
|
'* "auto" will try to load the weights in the safetensors format '
|
||||||
'and fall back to the pytorch bin format if safetensors format '
|
'and fall back to the pytorch bin format if safetensors format '
|
||||||
|
Loading…
x
Reference in New Issue
Block a user