[Misc] Fix Benchmark TTFT Calculation for Chat Completions (#3768)

2024-04-01 15:24:30 -07:00 · 2024-04-01 15:24:30 -07:00 · ccb58b23e6
commit ccb58b23e6
parent 49782fcb76
1 changed files with 3 additions and 3 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -334,7 +334,8 @@ async def async_request_openai_chat_completions(
                            timestamp = time.perf_counter()
                            data = json.loads(chunk)

-                            if "content" in data["choices"][0]["delta"]:
+                            delta = data["choices"][0]["delta"]
+                            if delta.get("content", None):
                                # First token
                                if ttft == 0:
                                    ttft = time.perf_counter() - st
@ -345,8 +346,7 @@ async def async_request_openai_chat_completions(
                                    output.itl.append(timestamp -
                                                      most_recent_timestamp)

-                                generated_text += data["choices"][0]["delta"][
-                                    "content"]
+                                generated_text += delta["content"]

                            most_recent_timestamp = timestamp