[BugFix] Ensure worker model loop is always stopped at the right time (#5987)

2024-07-01 16:17:58 -07:00 · 2024-07-01 16:17:58 -07:00 · c87ebc3ef9
commit c87ebc3ef9
parent c4059ea54f
1 changed files with 1 additions and 1 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@ -838,7 +838,7 @@ class LLMEngine:
        # Tracing
        self.do_tracing(scheduler_outputs)

-        if not request_outputs:
+        if not self.has_unfinished_requests():
            # Stop the execute model loop in parallel workers until there are
            # more requests to process. This avoids waiting indefinitely in
            # torch.distributed ops which may otherwise timeout, and unblocks