[Bugfix] Add error handling when server cannot respond any valid tokens (#5895)
This commit is contained in:
parent
40d0e7411d
commit
ad39bd640c
@ -256,6 +256,7 @@ async def async_request_openai_completions(
|
|||||||
async with session.post(url=api_url, json=payload,
|
async with session.post(url=api_url, json=payload,
|
||||||
headers=headers) as response:
|
headers=headers) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
|
first_valid_chunk_received = False
|
||||||
async for chunk_bytes in response.content:
|
async for chunk_bytes in response.content:
|
||||||
chunk_bytes = chunk_bytes.strip()
|
chunk_bytes = chunk_bytes.strip()
|
||||||
if not chunk_bytes:
|
if not chunk_bytes:
|
||||||
@ -274,7 +275,8 @@ async def async_request_openai_completions(
|
|||||||
if data["choices"][0]["text"]:
|
if data["choices"][0]["text"]:
|
||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
# First token
|
# First token
|
||||||
if ttft == 0.0:
|
if not first_valid_chunk_received:
|
||||||
|
first_chunk_received = True
|
||||||
ttft = time.perf_counter() - st
|
ttft = time.perf_counter() - st
|
||||||
output.ttft = ttft
|
output.ttft = ttft
|
||||||
|
|
||||||
@ -285,9 +287,14 @@ async def async_request_openai_completions(
|
|||||||
|
|
||||||
most_recent_timestamp = timestamp
|
most_recent_timestamp = timestamp
|
||||||
generated_text += data["choices"][0]["text"]
|
generated_text += data["choices"][0]["text"]
|
||||||
|
if first_chunk_received:
|
||||||
|
output.success = True
|
||||||
|
else:
|
||||||
|
output.success = False
|
||||||
|
output.error = (
|
||||||
|
"Never received a valid chunk to calculate TTFT."
|
||||||
|
"This response will be marked as failed!")
|
||||||
output.generated_text = generated_text
|
output.generated_text = generated_text
|
||||||
output.success = True
|
|
||||||
output.latency = latency
|
output.latency = latency
|
||||||
else:
|
else:
|
||||||
output.error = response.reason or ""
|
output.error = response.reason or ""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user