From 9dbf7a2dc1448d6657adfb2daba36be270dcebcd Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 17 Apr 2025 00:34:08 -0400 Subject: [PATCH] [V1] Remove log noise when idle (#16735) Signed-off-by: Russell Bryant --- vllm/v1/metrics/loggers.py | 15 +++++++++++++-- vllm/v1/spec_decode/metrics.py | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 3959be40..4d70f27f 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase): # TODO: Make the interval configurable. self.prefix_caching_metrics = PrefixCachingMetrics() self.spec_decoding_metrics = SpecDecodingMetrics() + self.last_prompt_throughput: float = 0.0 + self.last_generation_throughput: float = 0.0 def _reset(self, now): self.last_log_time = now @@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase): scheduler_stats = self.last_scheduler_stats + log_fn = logger.info + if not any( + (prompt_throughput, generation_throughput, + self.last_prompt_throughput, self.last_generation_throughput)): + # Avoid log noise on an idle production system + log_fn = logger.debug + self.last_generation_throughput = generation_throughput + self.last_prompt_throughput = prompt_throughput + # Format and print output. - logger.info( + log_fn( "Engine %03d: " "Avg prompt throughput: %.1f tokens/s, " "Avg generation throughput: %.1f tokens/s, " @@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase): ) if scheduler_stats.spec_decoding_stats is not None: - self.spec_decoding_metrics.log() + self.spec_decoding_metrics.log(log_fn=log_fn) class PrometheusStatLogger(StatLoggerBase): diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py index 7bb3c209..cc453b74 100644 --- a/vllm/v1/spec_decode/metrics.py +++ b/vllm/v1/spec_decode/metrics.py @@ -43,14 +43,14 @@ class SpecDecodingMetrics: self.num_accepted_tokens.append( spec_decoding_stats.num_accepted_tokens) - def log(self): + def log(self, log_fn=logger.info): num_draft_tokens = np.sum(self.num_draft_tokens) num_accepted_tokens = np.sum(self.num_accepted_tokens) draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens * 100 if num_draft_tokens > 0 else float("nan")) - logger.info( + log_fn( "SpecDecoding metrics: " "Draft acceptance rate: %.1f%%, " "Accepted: %d tokens, "