Fixes the misuse/mixuse of time.time()/time.monotonic() (#3220)
Signed-off-by: Tao He <sighingnow@gmail.com> Co-authored-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
03d37f2441
commit
14b8ae02e7
@ -160,7 +160,7 @@ class Scheduler:
|
||||
blocks_to_copy: Dict[int, List[int]] = {}
|
||||
|
||||
# Fix the current time.
|
||||
now = time.monotonic()
|
||||
now = time.time()
|
||||
|
||||
# Join waiting sequences if possible.
|
||||
if not self.swapped:
|
||||
|
@ -604,8 +604,7 @@ class AsyncLLMEngine:
|
||||
>>> ...
|
||||
"""
|
||||
# Preprocess the request.
|
||||
# This should not be used for logging, as it is monotonic time.
|
||||
arrival_time = time.monotonic()
|
||||
arrival_time = time.time()
|
||||
|
||||
try:
|
||||
stream = await self.add_request(
|
||||
|
@ -244,7 +244,7 @@ class LLMEngine:
|
||||
raise ValueError(f"Cannot request more than "
|
||||
f"{max_logprobs} logprobs.")
|
||||
if arrival_time is None:
|
||||
arrival_time = time.monotonic()
|
||||
arrival_time = time.time()
|
||||
prompt_token_ids = self.encode_request(
|
||||
request_id=request_id,
|
||||
prompt=prompt,
|
||||
@ -628,7 +628,7 @@ class LLMEngine:
|
||||
def _get_stats(self,
|
||||
scheduler_outputs: Optional[SchedulerOutputs]) -> Stats:
|
||||
"""Get Stats to be Logged to Prometheus."""
|
||||
now = time.monotonic()
|
||||
now = time.time()
|
||||
|
||||
# KV Cache Usage in %.
|
||||
num_total_gpu = self.cache_config.num_gpu_blocks
|
||||
|
@ -103,7 +103,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
|
||||
|
||||
model_name = request.model
|
||||
created_time = int(time.monotonic())
|
||||
created_time = int(time.time())
|
||||
chunk_object_type = "chat.completion.chunk"
|
||||
first_iteration = True
|
||||
|
||||
@ -244,7 +244,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
request_id: str) -> Union[ErrorResponse, ChatCompletionResponse]:
|
||||
|
||||
model_name = request.model
|
||||
created_time = int(time.monotonic())
|
||||
created_time = int(time.time())
|
||||
final_res: RequestOutput = None
|
||||
|
||||
async for res in result_generator:
|
||||
|
@ -118,7 +118,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
|
||||
model_name = request.model
|
||||
request_id = f"cmpl-{random_uuid()}"
|
||||
created_time = int(time.monotonic())
|
||||
created_time = int(time.time())
|
||||
|
||||
# Schedule the request and get the result generator.
|
||||
generators = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user