[Platform] add pre_register_and_update function (#12432)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
parent
da317197dd
commit
2e3b969ec0
@ -3057,7 +3057,8 @@ class VllmConfig:
|
||||
kv_transfer_config: KVTransferConfig = field(default=None,
|
||||
init=True) # type: ignore
|
||||
# some opaque config, only used to provide additional information
|
||||
# for the hash computation, mainly used for testing and debugging.
|
||||
# for the hash computation, mainly used for testing, debugging or out of
|
||||
# tree config registration.
|
||||
additional_config: SupportsHash = field(default=None,
|
||||
init=True) # type: ignore
|
||||
instance_id: str = ""
|
||||
|
@ -20,6 +20,7 @@ from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
|
||||
from vllm.executor.executor_base import ExecutorBase
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
from vllm.plugins import load_general_plugins
|
||||
from vllm.transformers_utils.utils import check_gguf_file
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import FlexibleArgumentParser, StoreBoolean
|
||||
@ -203,6 +204,8 @@ class EngineArgs:
|
||||
|
||||
calculate_kv_scales: Optional[bool] = None
|
||||
|
||||
additional_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.tokenizer:
|
||||
self.tokenizer = self.model
|
||||
@ -984,6 +987,14 @@ class EngineArgs:
|
||||
'be loaded from the model checkpoint if available. '
|
||||
'Otherwise, the scales will default to 1.0.')
|
||||
|
||||
parser.add_argument(
|
||||
"--additional-config",
|
||||
type=json.loads,
|
||||
default=None,
|
||||
help="Additional config for specified platform in JSON format. "
|
||||
"Different platforms may support different configs. Make sure the "
|
||||
"configs are valid for the platform you are using. The input format"
|
||||
" is like '{\"config_key\":\"config_value\"}'")
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
@ -1044,6 +1055,9 @@ class EngineArgs:
|
||||
def create_engine_config(self,
|
||||
usage_context: Optional[UsageContext] = None
|
||||
) -> VllmConfig:
|
||||
from vllm.platforms import current_platform
|
||||
current_platform.pre_register_and_update()
|
||||
|
||||
if envs.VLLM_USE_V1:
|
||||
self._override_v1_engine_args(usage_context)
|
||||
|
||||
@ -1287,6 +1301,7 @@ class EngineArgs:
|
||||
prompt_adapter_config=prompt_adapter_config,
|
||||
compilation_config=self.compilation_config,
|
||||
kv_transfer_config=self.kv_transfer_config,
|
||||
additional_config=self.additional_config,
|
||||
)
|
||||
|
||||
if envs.VLLM_USE_V1:
|
||||
@ -1347,6 +1362,12 @@ class AsyncEngineArgs(EngineArgs):
|
||||
parser.add_argument('--disable-log-requests',
|
||||
action='store_true',
|
||||
help='Disable logging requests.')
|
||||
# Initialize plugin to update the parser, for example, The plugin may
|
||||
# adding a new kind of quantization method to --quantization argument or
|
||||
# a new device to --device argument.
|
||||
load_general_plugins()
|
||||
from vllm.platforms import current_platform
|
||||
current_platform.pre_register_and_update(parser)
|
||||
return parser
|
||||
|
||||
|
||||
|
@ -13,8 +13,10 @@ from vllm.logger import init_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
else:
|
||||
VllmConfig = None
|
||||
FlexibleArgumentParser = None
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -223,6 +225,22 @@ class Platform:
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
||||
@classmethod
|
||||
def pre_register_and_update(cls,
|
||||
parser: Optional[FlexibleArgumentParser] = None
|
||||
) -> None:
|
||||
"""
|
||||
Do some pre-registeration or update action for the current platform.
|
||||
|
||||
This function is called before global VllmConfig is initialized or cli
|
||||
arguments are parsed. It's used for out-of-tree platforms to register or
|
||||
update the configuration.
|
||||
|
||||
For example, the out-of-tree quantization config can be imported and
|
||||
registered here dynamically.
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user