[V1] Remove log noise when idle (#16735)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-04-17 00:34:08 -04:00 · 2025-04-17 00:34:08 -04:00 · 9dbf7a2dc1
commit 9dbf7a2dc1
parent 607029e515
2 changed files with 15 additions and 4 deletions
--- a/vllm/v1/metrics/loggers.py
+++ b/vllm/v1/metrics/loggers.py
@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase):
        # TODO: Make the interval configurable.
        self.prefix_caching_metrics = PrefixCachingMetrics()
        self.spec_decoding_metrics = SpecDecodingMetrics()
        self.last_prompt_throughput: float = 0.0
        self.last_generation_throughput: float = 0.0
    def _reset(self, now):
        self.last_log_time = now
@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase):
        scheduler_stats = self.last_scheduler_stats
        log_fn = logger.info
        if not any(
            (prompt_throughput, generation_throughput,
             self.last_prompt_throughput, self.last_generation_throughput)):
            # Avoid log noise on an idle production system
            log_fn = logger.debug
        self.last_generation_throughput = generation_throughput
        self.last_prompt_throughput = prompt_throughput
        # Format and print output.
-        logger.info(
+        log_fn(
            "Engine %03d: "
            "Avg prompt throughput: %.1f tokens/s, "
            "Avg generation throughput: %.1f tokens/s, "
@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase):
        )
        if scheduler_stats.spec_decoding_stats is not None:
-            self.spec_decoding_metrics.log()
+            self.spec_decoding_metrics.log(log_fn=log_fn)
 class PrometheusStatLogger(StatLoggerBase):
--- a/vllm/v1/spec_decode/metrics.py
+++ b/vllm/v1/spec_decode/metrics.py
@ -43,14 +43,14 @@ class SpecDecodingMetrics:
        self.num_accepted_tokens.append(
            spec_decoding_stats.num_accepted_tokens)
-    def log(self):
+    def log(self, log_fn=logger.info):
        num_draft_tokens = np.sum(self.num_draft_tokens)
        num_accepted_tokens = np.sum(self.num_accepted_tokens)
        draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
                                 100 if num_draft_tokens > 0 else float("nan"))
-        logger.info(
+        log_fn(
            "SpecDecoding metrics: "
            "Draft acceptance rate: %.1f%%, "
            "Accepted: %d tokens, "