diff --git a/tests/test_regression.py b/tests/test_regression.py new file mode 100644 index 00000000..3bfb2b43 --- /dev/null +++ b/tests/test_regression.py @@ -0,0 +1,27 @@ +"""Containing tests that check for regressions in vLLM's behavior. + +It should include tests that are reported by users and making sure they +will never happen again. + +""" +from vllm import LLM, SamplingParams + + +def test_duplicated_ignored_sequence_group(): + """https://github.com/vllm-project/vllm/issues/1655""" + + sampling_params = SamplingParams(temperature=0.01, + top_p=0.1, + max_tokens=256) + llm = LLM(model="facebook/opt-125m", + max_num_batched_tokens=4096, + tensor_parallel_size=1) + prompts = ["This is a short prompt", "This is a very long prompt " * 1000] + outputs = llm.generate(prompts, sampling_params=sampling_params) + + assert len(prompts) == len(outputs) + + +if __name__ == "__main__": + import pytest + pytest.main([__file__]) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c3752b11..20af3fb3 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -567,7 +567,7 @@ class LLMEngine: blocks_to_copy=scheduler_outputs.blocks_to_copy, ) - return self._process_model_outputs(output, scheduler_outputs) + ignored + return self._process_model_outputs(output, scheduler_outputs) def _log_system_stats( self,