[Platform] add pre_register_and_update function (#12432)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
parent
da317197dd
commit
2e3b969ec0
@ -3057,7 +3057,8 @@ class VllmConfig:
|
|||||||
kv_transfer_config: KVTransferConfig = field(default=None,
|
kv_transfer_config: KVTransferConfig = field(default=None,
|
||||||
init=True) # type: ignore
|
init=True) # type: ignore
|
||||||
# some opaque config, only used to provide additional information
|
# some opaque config, only used to provide additional information
|
||||||
# for the hash computation, mainly used for testing and debugging.
|
# for the hash computation, mainly used for testing, debugging or out of
|
||||||
|
# tree config registration.
|
||||||
additional_config: SupportsHash = field(default=None,
|
additional_config: SupportsHash = field(default=None,
|
||||||
init=True) # type: ignore
|
init=True) # type: ignore
|
||||||
instance_id: str = ""
|
instance_id: str = ""
|
||||||
|
@ -20,6 +20,7 @@ from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
|
|||||||
from vllm.executor.executor_base import ExecutorBase
|
from vllm.executor.executor_base import ExecutorBase
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||||
|
from vllm.plugins import load_general_plugins
|
||||||
from vllm.transformers_utils.utils import check_gguf_file
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import FlexibleArgumentParser, StoreBoolean
|
from vllm.utils import FlexibleArgumentParser, StoreBoolean
|
||||||
@ -203,6 +204,8 @@ class EngineArgs:
|
|||||||
|
|
||||||
calculate_kv_scales: Optional[bool] = None
|
calculate_kv_scales: Optional[bool] = None
|
||||||
|
|
||||||
|
additional_config: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if not self.tokenizer:
|
if not self.tokenizer:
|
||||||
self.tokenizer = self.model
|
self.tokenizer = self.model
|
||||||
@ -984,6 +987,14 @@ class EngineArgs:
|
|||||||
'be loaded from the model checkpoint if available. '
|
'be loaded from the model checkpoint if available. '
|
||||||
'Otherwise, the scales will default to 1.0.')
|
'Otherwise, the scales will default to 1.0.')
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--additional-config",
|
||||||
|
type=json.loads,
|
||||||
|
default=None,
|
||||||
|
help="Additional config for specified platform in JSON format. "
|
||||||
|
"Different platforms may support different configs. Make sure the "
|
||||||
|
"configs are valid for the platform you are using. The input format"
|
||||||
|
" is like '{\"config_key\":\"config_value\"}'")
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1044,6 +1055,9 @@ class EngineArgs:
|
|||||||
def create_engine_config(self,
|
def create_engine_config(self,
|
||||||
usage_context: Optional[UsageContext] = None
|
usage_context: Optional[UsageContext] = None
|
||||||
) -> VllmConfig:
|
) -> VllmConfig:
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
current_platform.pre_register_and_update()
|
||||||
|
|
||||||
if envs.VLLM_USE_V1:
|
if envs.VLLM_USE_V1:
|
||||||
self._override_v1_engine_args(usage_context)
|
self._override_v1_engine_args(usage_context)
|
||||||
|
|
||||||
@ -1287,6 +1301,7 @@ class EngineArgs:
|
|||||||
prompt_adapter_config=prompt_adapter_config,
|
prompt_adapter_config=prompt_adapter_config,
|
||||||
compilation_config=self.compilation_config,
|
compilation_config=self.compilation_config,
|
||||||
kv_transfer_config=self.kv_transfer_config,
|
kv_transfer_config=self.kv_transfer_config,
|
||||||
|
additional_config=self.additional_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
if envs.VLLM_USE_V1:
|
if envs.VLLM_USE_V1:
|
||||||
@ -1347,6 +1362,12 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
parser.add_argument('--disable-log-requests',
|
parser.add_argument('--disable-log-requests',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Disable logging requests.')
|
help='Disable logging requests.')
|
||||||
|
# Initialize plugin to update the parser, for example, The plugin may
|
||||||
|
# adding a new kind of quantization method to --quantization argument or
|
||||||
|
# a new device to --device argument.
|
||||||
|
load_general_plugins()
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
current_platform.pre_register_and_update(parser)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,8 +13,10 @@ from vllm.logger import init_logger
|
|||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
|
from vllm.utils import FlexibleArgumentParser
|
||||||
else:
|
else:
|
||||||
VllmConfig = None
|
VllmConfig = None
|
||||||
|
FlexibleArgumentParser = None
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -223,6 +225,22 @@ class Platform:
|
|||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pre_register_and_update(cls,
|
||||||
|
parser: Optional[FlexibleArgumentParser] = None
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Do some pre-registeration or update action for the current platform.
|
||||||
|
|
||||||
|
This function is called before global VllmConfig is initialized or cli
|
||||||
|
arguments are parsed. It's used for out-of-tree platforms to register or
|
||||||
|
update the configuration.
|
||||||
|
|
||||||
|
For example, the out-of-tree quantization config can be imported and
|
||||||
|
registered here dynamically.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user