[Bugfix] Update InternVL input mapper to support image embeds (#9351)

2024-10-14 21:29:19 -07:00 · 2024-10-14 21:29:19 -07:00 · 55e081fbad
commit 55e081fbad
parent 8e836d982a
1 changed files with 2 additions and 0 deletions
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@ -342,6 +342,8 @@ class InternVLInputPipeline:
        elif is_list_of(data, Image.Image):
            # we can't stack here because images may have different num_patches
            data = [image_pixel_values_mapper(img) for img in data]
+        else:
+            return MultiModalInputs({"image_embeds": data})
        model_config = ctx.model_config
        tokenizer = cached_get_tokenizer(
            model_config.tokenizer,