[Bugfix] add hf_token to EngineArgs (#16093)
Signed-off-by: paolovic <paul-philipp.luley@uzh.ch> Co-authored-by: paolovic <paul-philipp.luley@uzh.ch>
This commit is contained in:
parent
3a100b9278
commit
da224daaa9
@ -173,6 +173,9 @@ class ModelConfig:
|
|||||||
Defaults to True.
|
Defaults to True.
|
||||||
config_format: The config format which shall be loaded.
|
config_format: The config format which shall be loaded.
|
||||||
Defaults to 'auto' which defaults to 'hf'.
|
Defaults to 'auto' which defaults to 'hf'.
|
||||||
|
hf_token: The token to use as HTTP bearer authorization for remote files
|
||||||
|
. If `True`, will use the token generated when running
|
||||||
|
`huggingface-cli login` (stored in `~/.huggingface`).
|
||||||
hf_overrides: If a dictionary, contains arguments to be forwarded to the
|
hf_overrides: If a dictionary, contains arguments to be forwarded to the
|
||||||
HuggingFace config. If a callable, it is called to update the
|
HuggingFace config. If a callable, it is called to update the
|
||||||
HuggingFace config.
|
HuggingFace config.
|
||||||
@ -256,6 +259,7 @@ class ModelConfig:
|
|||||||
limit_mm_per_prompt: Optional[Mapping[str, int]] = None,
|
limit_mm_per_prompt: Optional[Mapping[str, int]] = None,
|
||||||
use_async_output_proc: bool = True,
|
use_async_output_proc: bool = True,
|
||||||
config_format: ConfigFormat = ConfigFormat.AUTO,
|
config_format: ConfigFormat = ConfigFormat.AUTO,
|
||||||
|
hf_token: Optional[Union[bool, str]] = None,
|
||||||
hf_overrides: Optional[HfOverrides] = None,
|
hf_overrides: Optional[HfOverrides] = None,
|
||||||
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
||||||
disable_mm_preprocessor_cache: bool = False,
|
disable_mm_preprocessor_cache: bool = False,
|
||||||
@ -356,7 +360,7 @@ class ModelConfig:
|
|||||||
self.hf_text_config = get_hf_text_config(self.hf_config)
|
self.hf_text_config = get_hf_text_config(self.hf_config)
|
||||||
self.encoder_config = self._get_encoder_config()
|
self.encoder_config = self._get_encoder_config()
|
||||||
self.hf_image_processor_config = get_hf_image_processor_config(
|
self.hf_image_processor_config = get_hf_image_processor_config(
|
||||||
self.model, revision)
|
self.model, hf_token=hf_token, revision=revision)
|
||||||
self.dtype = _get_and_verify_dtype(self.hf_config, dtype)
|
self.dtype = _get_and_verify_dtype(self.hf_config, dtype)
|
||||||
self.use_async_output_proc = use_async_output_proc
|
self.use_async_output_proc = use_async_output_proc
|
||||||
self.mm_processor_kwargs = mm_processor_kwargs
|
self.mm_processor_kwargs = mm_processor_kwargs
|
||||||
|
@ -138,6 +138,7 @@ class EngineArgs:
|
|||||||
code_revision: Optional[str] = None
|
code_revision: Optional[str] = None
|
||||||
rope_scaling: Optional[Dict[str, Any]] = None
|
rope_scaling: Optional[Dict[str, Any]] = None
|
||||||
rope_theta: Optional[float] = None
|
rope_theta: Optional[float] = None
|
||||||
|
hf_token: Optional[Union[bool, str]] = None
|
||||||
hf_overrides: Optional[HfOverrides] = None
|
hf_overrides: Optional[HfOverrides] = None
|
||||||
tokenizer_revision: Optional[str] = None
|
tokenizer_revision: Optional[str] = None
|
||||||
quantization: Optional[str] = None
|
quantization: Optional[str] = None
|
||||||
@ -602,6 +603,16 @@ class EngineArgs:
|
|||||||
help='RoPE theta. Use with `rope_scaling`. In '
|
help='RoPE theta. Use with `rope_scaling`. In '
|
||||||
'some cases, changing the RoPE theta improves the '
|
'some cases, changing the RoPE theta improves the '
|
||||||
'performance of the scaled model.')
|
'performance of the scaled model.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--hf-token',
|
||||||
|
type=str,
|
||||||
|
nargs='?',
|
||||||
|
const=True,
|
||||||
|
default=None,
|
||||||
|
help='The token to use as HTTP bearer authorization'
|
||||||
|
' for remote files. If `True`, will use the token '
|
||||||
|
'generated when running `huggingface-cli login` '
|
||||||
|
'(stored in `~/.huggingface`).')
|
||||||
parser.add_argument('--hf-overrides',
|
parser.add_argument('--hf-overrides',
|
||||||
type=json.loads,
|
type=json.loads,
|
||||||
default=EngineArgs.hf_overrides,
|
default=EngineArgs.hf_overrides,
|
||||||
@ -1038,6 +1049,7 @@ class EngineArgs:
|
|||||||
code_revision=self.code_revision,
|
code_revision=self.code_revision,
|
||||||
rope_scaling=self.rope_scaling,
|
rope_scaling=self.rope_scaling,
|
||||||
rope_theta=self.rope_theta,
|
rope_theta=self.rope_theta,
|
||||||
|
hf_token=self.hf_token,
|
||||||
hf_overrides=self.hf_overrides,
|
hf_overrides=self.hf_overrides,
|
||||||
tokenizer_revision=self.tokenizer_revision,
|
tokenizer_revision=self.tokenizer_revision,
|
||||||
max_model_len=self.max_model_len,
|
max_model_len=self.max_model_len,
|
||||||
|
@ -117,6 +117,9 @@ class LLM:
|
|||||||
disable_custom_all_reduce: See :class:`~vllm.config.ParallelConfig`
|
disable_custom_all_reduce: See :class:`~vllm.config.ParallelConfig`
|
||||||
disable_async_output_proc: Disable async output processing.
|
disable_async_output_proc: Disable async output processing.
|
||||||
This may result in lower performance.
|
This may result in lower performance.
|
||||||
|
hf_token: The token to use as HTTP bearer authorization for remote files
|
||||||
|
. If `True`, will use the token generated when running
|
||||||
|
`huggingface-cli login` (stored in `~/.huggingface`).
|
||||||
hf_overrides: If a dictionary, contains arguments to be forwarded to the
|
hf_overrides: If a dictionary, contains arguments to be forwarded to the
|
||||||
HuggingFace config. If a callable, it is called to update the
|
HuggingFace config. If a callable, it is called to update the
|
||||||
HuggingFace config.
|
HuggingFace config.
|
||||||
@ -177,6 +180,7 @@ class LLM:
|
|||||||
max_seq_len_to_capture: int = 8192,
|
max_seq_len_to_capture: int = 8192,
|
||||||
disable_custom_all_reduce: bool = False,
|
disable_custom_all_reduce: bool = False,
|
||||||
disable_async_output_proc: bool = False,
|
disable_async_output_proc: bool = False,
|
||||||
|
hf_token: Optional[Union[bool, str]] = None,
|
||||||
hf_overrides: Optional[HfOverrides] = None,
|
hf_overrides: Optional[HfOverrides] = None,
|
||||||
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
||||||
# After positional args are removed, move this right below `model`
|
# After positional args are removed, move this right below `model`
|
||||||
@ -232,6 +236,7 @@ class LLM:
|
|||||||
max_seq_len_to_capture=max_seq_len_to_capture,
|
max_seq_len_to_capture=max_seq_len_to_capture,
|
||||||
disable_custom_all_reduce=disable_custom_all_reduce,
|
disable_custom_all_reduce=disable_custom_all_reduce,
|
||||||
disable_async_output_proc=disable_async_output_proc,
|
disable_async_output_proc=disable_async_output_proc,
|
||||||
|
hf_token=hf_token,
|
||||||
hf_overrides=hf_overrides,
|
hf_overrides=hf_overrides,
|
||||||
mm_processor_kwargs=mm_processor_kwargs,
|
mm_processor_kwargs=mm_processor_kwargs,
|
||||||
override_pooler_config=override_pooler_config,
|
override_pooler_config=override_pooler_config,
|
||||||
|
@ -712,6 +712,7 @@ def load_params_config(model: Union[str, Path], revision: Optional[str],
|
|||||||
|
|
||||||
def get_hf_image_processor_config(
|
def get_hf_image_processor_config(
|
||||||
model: Union[str, Path],
|
model: Union[str, Path],
|
||||||
|
hf_token: Optional[Union[bool, str]] = None,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
@ -721,7 +722,10 @@ def get_hf_image_processor_config(
|
|||||||
# Separate model folder from file path for GGUF models
|
# Separate model folder from file path for GGUF models
|
||||||
if check_gguf_file(model):
|
if check_gguf_file(model):
|
||||||
model = Path(model).parent
|
model = Path(model).parent
|
||||||
return get_image_processor_config(model, revision=revision, **kwargs)
|
return get_image_processor_config(model,
|
||||||
|
token=hf_token,
|
||||||
|
revision=revision,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def get_hf_text_config(config: PretrainedConfig):
|
def get_hf_text_config(config: PretrainedConfig):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user