[Bugfix] fix output parsing error for trtllm backend (#4137)
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
parent
8438e0569e
commit
fe3b5bbc23
@ -135,6 +135,7 @@ async def async_request_trt_llm(
|
|||||||
"data:")
|
"data:")
|
||||||
|
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
output.generated_text += data["text_output"]
|
||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
# First token
|
# First token
|
||||||
if ttft == 0.0:
|
if ttft == 0.0:
|
||||||
@ -149,7 +150,6 @@ async def async_request_trt_llm(
|
|||||||
most_recent_timestamp = timestamp
|
most_recent_timestamp = timestamp
|
||||||
|
|
||||||
output.latency = most_recent_timestamp - st
|
output.latency = most_recent_timestamp - st
|
||||||
output.generated_text = json.loads(data)["text_output"]
|
|
||||||
output.success = True
|
output.success = True
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user