From 9dbf7a2dc1448d6657adfb2daba36be270dcebcd Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Thu, 17 Apr 2025 00:34:08 -0400
Subject: [PATCH] [V1] Remove log noise when idle (#16735)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 vllm/v1/metrics/loggers.py     | 15 +++++++++++++--
 vllm/v1/spec_decode/metrics.py |  4 ++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py
index 3959be40..4d70f27f 100644
--- a/vllm/v1/metrics/loggers.py
+++ b/vllm/v1/metrics/loggers.py
@@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase):
         # TODO: Make the interval configurable.
         self.prefix_caching_metrics = PrefixCachingMetrics()
         self.spec_decoding_metrics = SpecDecodingMetrics()
+        self.last_prompt_throughput: float = 0.0
+        self.last_generation_throughput: float = 0.0
 
     def _reset(self, now):
         self.last_log_time = now
@@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase):
 
         scheduler_stats = self.last_scheduler_stats
 
+        log_fn = logger.info
+        if not any(
+            (prompt_throughput, generation_throughput,
+             self.last_prompt_throughput, self.last_generation_throughput)):
+            # Avoid log noise on an idle production system
+            log_fn = logger.debug
+        self.last_generation_throughput = generation_throughput
+        self.last_prompt_throughput = prompt_throughput
+
         # Format and print output.
-        logger.info(
+        log_fn(
             "Engine %03d: "
             "Avg prompt throughput: %.1f tokens/s, "
             "Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase):
         )
 
         if scheduler_stats.spec_decoding_stats is not None:
-            self.spec_decoding_metrics.log()
+            self.spec_decoding_metrics.log(log_fn=log_fn)
 
 
 class PrometheusStatLogger(StatLoggerBase):
diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py
index 7bb3c209..cc453b74 100644
--- a/vllm/v1/spec_decode/metrics.py
+++ b/vllm/v1/spec_decode/metrics.py
@@ -43,14 +43,14 @@ class SpecDecodingMetrics:
         self.num_accepted_tokens.append(
             spec_decoding_stats.num_accepted_tokens)
 
-    def log(self):
+    def log(self, log_fn=logger.info):
         num_draft_tokens = np.sum(self.num_draft_tokens)
         num_accepted_tokens = np.sum(self.num_accepted_tokens)
 
         draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
                                  100 if num_draft_tokens > 0 else float("nan"))
 
-        logger.info(
+        log_fn(
             "SpecDecoding metrics: "
             "Draft acceptance rate: %.1f%%, "
             "Accepted: %d tokens, "