[TPU][V1] Make --disable_chunked_mm_input mandatory for serving MM models (#16483)

Signed-off-by: NickLucche <nlucches@redhat.com>
2025-04-11 19:06:14 +02:00 · 2025-04-11 19:06:14 +02:00 · 4d022cbc75
commit 4d022cbc75
parent 70de35a881
1 changed files with 7 additions and 0 deletions
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@ -120,6 +120,13 @@ class TpuPlatform(Platform):
        assert not vllm_config.speculative_config, (
            "Speculative decoding is not yet supported for TPU backend")

+        if scheduler_config.is_multimodal_model and not \
+            scheduler_config.disable_chunked_mm_input:
+            logger.warning("TPU does not support running Multimodal models"\
+            " without setting `--disable_chunked_mm_input`. " \
+            "Forcing --disable_chunked_mm_input.")
+            scheduler_config.disable_chunked_mm_input = True
+
    @classmethod
    def is_pin_memory_available(cls):
        logger.warning("Pin memory is not supported on TPU.")