From 7c80368710755ab54147eaa0090b7782622fda48 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 7 Apr 2025 19:04:02 +0800 Subject: [PATCH] [VLM] Florence-2 supports online serving (#16164) Signed-off-by: Isotr0py <2037008807@qq.com> --- examples/template_florence2.jinja | 7 +++++++ vllm/entrypoints/chat_utils.py | 4 ++-- vllm/model_executor/models/florence2.py | 14 +++++++++++++- 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 examples/template_florence2.jinja diff --git a/examples/template_florence2.jinja b/examples/template_florence2.jinja new file mode 100644 index 00000000..d257aed6 --- /dev/null +++ b/examples/template_florence2.jinja @@ -0,0 +1,7 @@ +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {{- message['content'] -}} + {%- elif message['role'] == 'assistant' -%} + {{- message['content'] -}} + {%- endif -%} +{%- endfor -%} diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 9129e47d..9041b92a 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -487,8 +487,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]): return "<|endoftext10|>" # 200010 (see vocab.json in hf model) if model_type in ("minicpmo", "minicpmv"): return "(./)" - if model_type in ("blip-2", "fuyu", "paligemma", "pixtral", - "mistral3"): + if model_type in ("blip-2", "florence2", "fuyu", "paligemma", + "pixtral", "mistral3"): # These models do not use image tokens in the prompt return None if model_type == "qwen": diff --git a/vllm/model_executor/models/florence2.py b/vllm/model_executor/models/florence2.py index 02535cc5..70b8d51b 100644 --- a/vllm/model_executor/models/florence2.py +++ b/vllm/model_executor/models/florence2.py @@ -10,7 +10,7 @@ import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange -from transformers import BatchFeature, PretrainedConfig +from transformers import BartTokenizer, BatchFeature, PretrainedConfig from vllm.config import VllmConfig from vllm.model_executor.layers.logits_processor import LogitsProcessor @@ -826,6 +826,18 @@ class Florence2MultiModalProcessor( ) -> Union[str, list[int]]: return [self.info.get_hf_config().eos_token_id] + def _apply_hf_processor_tokens_only( + self, + prompt_tokens: list[int], + ) -> list[int]: + hf_processor = self.info.get_hf_processor() + tokenizer: BartTokenizer = hf_processor.tokenizer + prompt_text = tokenizer.decode(prompt_tokens) + # convert task tokens to prompt + prompt_text = hf_processor._construct_prompts([prompt_text])[0] + prompt_tokens = tokenizer.encode(prompt_text, add_special_tokens=False) + return prompt_tokens + def _call_hf_processor( self, prompt: str,