[Bugfix] Add error handling when server cannot respond any valid tokens (#5895)

This commit is contained in:
DearPlanet 2024-11-08 12:58:37 +08:00 committed by GitHub
parent 40d0e7411d
commit ad39bd640c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -256,6 +256,7 @@ async def async_request_openai_completions(
async with session.post(url=api_url, json=payload,
headers=headers) as response:
if response.status == 200:
first_valid_chunk_received = False
async for chunk_bytes in response.content:
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
@ -274,7 +275,8 @@ async def async_request_openai_completions(
if data["choices"][0]["text"]:
timestamp = time.perf_counter()
# First token
if ttft == 0.0:
if not first_valid_chunk_received:
first_chunk_received = True
ttft = time.perf_counter() - st
output.ttft = ttft
@ -285,9 +287,14 @@ async def async_request_openai_completions(
most_recent_timestamp = timestamp
generated_text += data["choices"][0]["text"]
if first_chunk_received:
output.success = True
else:
output.success = False
output.error = (
"Never received a valid chunk to calculate TTFT."
"This response will be marked as failed!")
output.generated_text = generated_text
output.success = True
output.latency = latency
else:
output.error = response.reason or ""