Fixes the misuse/mixuse of time.time()/time.monotonic() (#3220)

Signed-off-by: Tao He <sighingnow@gmail.com>
Co-authored-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
Tao He 2024-03-16 02:25:43 +08:00 committed by GitHub
parent 03d37f2441
commit 14b8ae02e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 7 additions and 8 deletions

View File

@ -160,7 +160,7 @@ class Scheduler:
blocks_to_copy: Dict[int, List[int]] = {}
# Fix the current time.
now = time.monotonic()
now = time.time()
# Join waiting sequences if possible.
if not self.swapped:

View File

@ -604,8 +604,7 @@ class AsyncLLMEngine:
>>> ...
"""
# Preprocess the request.
# This should not be used for logging, as it is monotonic time.
arrival_time = time.monotonic()
arrival_time = time.time()
try:
stream = await self.add_request(

View File

@ -244,7 +244,7 @@ class LLMEngine:
raise ValueError(f"Cannot request more than "
f"{max_logprobs} logprobs.")
if arrival_time is None:
arrival_time = time.monotonic()
arrival_time = time.time()
prompt_token_ids = self.encode_request(
request_id=request_id,
prompt=prompt,
@ -628,7 +628,7 @@ class LLMEngine:
def _get_stats(self,
scheduler_outputs: Optional[SchedulerOutputs]) -> Stats:
"""Get Stats to be Logged to Prometheus."""
now = time.monotonic()
now = time.time()
# KV Cache Usage in %.
num_total_gpu = self.cache_config.num_gpu_blocks

View File

@ -103,7 +103,7 @@ class OpenAIServingChat(OpenAIServing):
) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
model_name = request.model
created_time = int(time.monotonic())
created_time = int(time.time())
chunk_object_type = "chat.completion.chunk"
first_iteration = True
@ -244,7 +244,7 @@ class OpenAIServingChat(OpenAIServing):
request_id: str) -> Union[ErrorResponse, ChatCompletionResponse]:
model_name = request.model
created_time = int(time.monotonic())
created_time = int(time.time())
final_res: RequestOutput = None
async for res in result_generator:

View File

@ -118,7 +118,7 @@ class OpenAIServingCompletion(OpenAIServing):
model_name = request.model
request_id = f"cmpl-{random_uuid()}"
created_time = int(time.monotonic())
created_time = int(time.time())
# Schedule the request and get the result generator.
generators = []