[Frontend] OpenAI API server: Add add_special_tokens
to ChatCompletionRequest (default False) (#5278)
This commit is contained in:
parent
c65146e75e
commit
f0a500545f
@ -176,6 +176,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
"This is a parameter used by chat template in tokenizer config of the "
|
"This is a parameter used by chat template in tokenizer config of the "
|
||||||
"model."),
|
"model."),
|
||||||
)
|
)
|
||||||
|
add_special_tokens: Optional[bool] = Field(
|
||||||
|
default=False,
|
||||||
|
description=(
|
||||||
|
"If true, special tokens (e.g. BOS) will be added to the prompt "
|
||||||
|
"on top of what is added by the chat template. "
|
||||||
|
"For most models, the chat template takes care of adding the "
|
||||||
|
"special tokens so this should be set to False (as is the "
|
||||||
|
"default)."),
|
||||||
|
)
|
||||||
include_stop_str_in_output: Optional[bool] = Field(
|
include_stop_str_in_output: Optional[bool] = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description=(
|
description=(
|
||||||
|
@ -163,7 +163,9 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
try:
|
try:
|
||||||
# Tokenize/detokenize depending on prompt format (string/token list)
|
# Tokenize/detokenize depending on prompt format (string/token list)
|
||||||
prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
|
prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
|
||||||
request, prompt=prompt, add_special_tokens=False)
|
request,
|
||||||
|
prompt=prompt,
|
||||||
|
add_special_tokens=request.add_special_tokens)
|
||||||
sampling_params = request.to_sampling_params()
|
sampling_params = request.to_sampling_params()
|
||||||
lora_request = self._maybe_get_lora(request)
|
lora_request = self._maybe_get_lora(request)
|
||||||
decoding_config = await self.engine.get_decoding_config()
|
decoding_config = await self.engine.get_decoding_config()
|
||||||
|
@ -131,7 +131,8 @@ class OpenAIServing:
|
|||||||
prompt_ids: Optional[List[int]] = None,
|
prompt_ids: Optional[List[int]] = None,
|
||||||
truncate_prompt_tokens: Optional[Annotated[int,
|
truncate_prompt_tokens: Optional[Annotated[int,
|
||||||
Field(ge=1)]] = None,
|
Field(ge=1)]] = None,
|
||||||
add_special_tokens: bool = True) -> Tuple[List[int], str]:
|
add_special_tokens: Optional[bool] = True
|
||||||
|
) -> Tuple[List[int], str]:
|
||||||
if not (prompt or prompt_ids):
|
if not (prompt or prompt_ids):
|
||||||
raise ValueError("Either prompt or prompt_ids should be provided.")
|
raise ValueError("Either prompt or prompt_ids should be provided.")
|
||||||
if (prompt and prompt_ids):
|
if (prompt and prompt_ids):
|
||||||
@ -139,11 +140,12 @@ class OpenAIServing:
|
|||||||
"Only one of prompt or prompt_ids should be provided.")
|
"Only one of prompt or prompt_ids should be provided.")
|
||||||
|
|
||||||
if prompt_ids is None:
|
if prompt_ids is None:
|
||||||
# When using OpenAIServingChat for chat completions, the
|
# When using OpenAIServingChat for chat completions, for
|
||||||
# special tokens (e.g., BOS) have already been added by the
|
# most models the special tokens (e.g., BOS) have already
|
||||||
# chat template. Therefore, we do not need to add them again.
|
# been added by the chat template. Therefore, we do not
|
||||||
# Set add_special_tokens to False to avoid adding the BOS tokens
|
# need to add them again.
|
||||||
# again.
|
# Set add_special_tokens to False (by default) to avoid
|
||||||
|
# adding the BOS tokens again.
|
||||||
tokenizer_kwargs: Dict[str, Any] = {
|
tokenizer_kwargs: Dict[str, Any] = {
|
||||||
"add_special_tokens": add_special_tokens
|
"add_special_tokens": add_special_tokens
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user