diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 2347cdee..46440537 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -2062,7 +2062,7 @@ class LLMEngine: raise ValueError(f"The {prompt_type} prompt cannot be empty") max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) >= max_prompt_len: + if len(prompt_ids) > max_prompt_len: if prompt_type == "encoder" and model_config.is_multimodal_model: mm_registry = self.input_preprocessor.mm_registry mm_processor = mm_registry.create_processor( diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index be7e3709..afbbddb8 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -354,7 +354,7 @@ class Processor: raise ValueError(f"Token id {max_input_id} is out of vocabulary") max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) >= max_prompt_len: + if len(prompt_ids) > max_prompt_len: if prompt_type == "encoder" and model_config.is_multimodal_model: mm_registry = self.input_preprocessor.mm_registry mm_processor = mm_registry.create_processor(