[V1][Core] Remove should_shutdown to simplify core process termination (#11113)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
parent
d1e21a979b
commit
452a723bf2
@ -5,7 +5,6 @@ import signal
|
||||
import threading
|
||||
import time
|
||||
from multiprocessing.process import BaseProcess
|
||||
from multiprocessing.sharedctypes import Synchronized
|
||||
from typing import List, Tuple, Type, Union
|
||||
|
||||
import zmq
|
||||
@ -133,13 +132,9 @@ class EngineCoreProc(EngineCore):
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
ready_path: str,
|
||||
should_shutdown: Synchronized,
|
||||
):
|
||||
super().__init__(vllm_config, executor_class, usage_context)
|
||||
|
||||
# Signal from main process to shutdown (multiprocessing.Value).
|
||||
self.should_shutdown = should_shutdown
|
||||
|
||||
# Background Threads and Queues for IO. These enable us to
|
||||
# overlap ZMQ socket IO with GPU since they release the GIL,
|
||||
# and to overlap some serialization/deserialization with the
|
||||
@ -195,7 +190,6 @@ class EngineCoreProc(EngineCore):
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
ready_path: str,
|
||||
should_shutdown: Synchronized,
|
||||
) -> BaseProcess:
|
||||
# The current process might have CUDA context,
|
||||
# so we need to spawn a new process.
|
||||
@ -210,7 +204,6 @@ class EngineCoreProc(EngineCore):
|
||||
"vllm_config": vllm_config,
|
||||
"executor_class": executor_class,
|
||||
"usage_context": usage_context,
|
||||
"should_shutdown": should_shutdown
|
||||
}
|
||||
# Run EngineCore busy loop in background process.
|
||||
proc = context.Process(target=EngineCoreProc.run_engine_core,
|
||||
@ -260,8 +253,8 @@ class EngineCoreProc(EngineCore):
|
||||
def run_busy_loop(self):
|
||||
"""Core busy loop of the EngineCore."""
|
||||
|
||||
# Loop until we get a shutdown signal.
|
||||
while not self.should_shutdown:
|
||||
# Loop until process is sent a SIGINT or SIGTERM
|
||||
while True:
|
||||
# 1) Poll the input queue until there is work to do.
|
||||
if not self.scheduler.has_unfinished_requests():
|
||||
while True:
|
||||
@ -272,8 +265,6 @@ class EngineCoreProc(EngineCore):
|
||||
except queue.Empty:
|
||||
self._log_stats()
|
||||
logger.debug("EngineCore busy loop waiting.")
|
||||
if self.should_shutdown:
|
||||
return
|
||||
except BaseException:
|
||||
raise
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
import atexit
|
||||
import multiprocessing
|
||||
from typing import List, Union
|
||||
|
||||
import msgspec
|
||||
@ -149,21 +148,16 @@ class MPClient(EngineCoreClient):
|
||||
self.input_socket.bind(input_path)
|
||||
|
||||
# Start EngineCore in background process.
|
||||
self.should_shutdown = multiprocessing.Value('b', False, lock=False)
|
||||
self.proc = EngineCoreProc.make_engine_core_process(
|
||||
*args,
|
||||
input_path=input_path,
|
||||
output_path=output_path,
|
||||
ready_path=ready_path,
|
||||
should_shutdown=self.should_shutdown,
|
||||
**kwargs,
|
||||
)
|
||||
atexit.register(self.shutdown)
|
||||
|
||||
def shutdown(self):
|
||||
# Send shutdown signal to background process.
|
||||
self.should_shutdown = True
|
||||
|
||||
# Shut down the zmq context.
|
||||
self.ctx.destroy(linger=0)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user