[Bugfix] Add error handling when server cannot respond any valid tokens (#5895)

2024-11-08 12:58:37 +08:00 · 2024-11-08 12:58:37 +08:00 · ad39bd640c
commit ad39bd640c
parent 40d0e7411d
1 changed files with 10 additions and 3 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -256,6 +256,7 @@ async def async_request_openai_completions(
            async with session.post(url=api_url, json=payload,
                                    headers=headers) as response:
                if response.status == 200:
+                    first_valid_chunk_received = False
                    async for chunk_bytes in response.content:
                        chunk_bytes = chunk_bytes.strip()
                        if not chunk_bytes:
@ -274,7 +275,8 @@ async def async_request_openai_completions(
                            if data["choices"][0]["text"]:
                                timestamp = time.perf_counter()
                                # First token
-                                if ttft == 0.0:
+                                if not first_valid_chunk_received:
+                                    first_chunk_received = True
                                    ttft = time.perf_counter() - st
                                    output.ttft = ttft

@ -285,9 +287,14 @@ async def async_request_openai_completions(

                                most_recent_timestamp = timestamp
                                generated_text += data["choices"][0]["text"]
-
+                    if first_chunk_received:
+                        output.success = True
+                    else:
+                        output.success = False
+                        output.error = (
+                            "Never received a valid chunk to calculate TTFT."
+                            "This response will be marked as failed!")
                    output.generated_text = generated_text
-                    output.success = True
                    output.latency = latency
                else:
                    output.error = response.reason or ""