diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index eb1512ca..a73b4c82 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -5,6 +5,7 @@ from typing import Iterator, List, Optional, Union import cloudpickle import zmq +from ray.exceptions import RayTaskError from vllm import AsyncEngineArgs, SamplingParams # yapf conflicts with isort for this block @@ -305,6 +306,11 @@ class MQLLMEngine: def _send_outputs(self, outputs: REQUEST_OUTPUTS_T): """Send List of RequestOutput to RPCClient.""" if outputs: + # RayTaskError might not pickelable here. We need to unpack the + # underlying exception as the real exception in the output. + if (isinstance(outputs, RPCError) + and isinstance(outputs.exception, RayTaskError)): + outputs.exception = outputs.exception.cause output_bytes = pickle.dumps(outputs) self.output_socket.send_multipart((output_bytes, ), copy=False)