[Bugfix] Guard for negative counter metrics to prevent crash (#10430)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
This commit is contained in:
Travis Johnson 2024-11-18 21:57:10 -07:00 committed by GitHub
parent 74f8c2cf5f
commit 272e31c0bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 1 deletions

View File

@ -1716,7 +1716,7 @@ class LLMEngine:
# not counted (to avoid double counting)
actual_num_batched_tokens = scheduler_outputs.num_batched_tokens # type: ignore
num_generation_tokens_from_prefill_groups = 0.
num_generation_tokens_from_prefill_groups = 0
# NOTE: if scheduler_outputs.num_prefill_groups > 0 and
# the len of scheduler_outputs.scheduled_seq_groups is !=
# scheduler_outputs.num_prefill_groups, this means that

View File

@ -512,6 +512,11 @@ class PrometheusStatLogger(StatLoggerBase):
def _log_counter(self, counter, data: Union[int, float]) -> None:
# Convenience function for logging to counter.
# Prevent ValueError from negative increment
if data < 0:
logger.warning("Skipping negative increment of %g to %s", data,
counter)
return
counter.labels(**self.labels).inc(data)
def _log_counter_labels(self, counter, data: CollectionsCounter,