diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 45a387a1..e0323abe 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [ "vllm:prompt_tokens_total", "vllm:generation_tokens_total", "vllm:iteration_tokens_total", + "vllm:cache_config_info", "vllm:request_success_total", "vllm:request_prompt_tokens_sum", "vllm:request_prompt_tokens_bucket", diff --git a/vllm/config.py b/vllm/config.py index d6e197fe..dbcacdf4 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -88,6 +88,12 @@ class SupportsHash(Protocol): ... +class SupportsMetricsInfo(Protocol): + + def metrics_info(self) -> Dict[str, str]: + ... + + class ModelImpl(str, enum.Enum): AUTO = "auto" VLLM = "vllm" diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 7c55d66e..e8736dff 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast import numpy as np import prometheus_client -from vllm.config import VllmConfig -from vllm.engine.metrics_types import (StatLoggerBase, Stats, - SupportsMetricsInfo) +from vllm.config import SupportsMetricsInfo, VllmConfig +from vllm.engine.metrics_types import StatLoggerBase, Stats from vllm.executor.ray_utils import ray from vllm.logger import init_logger diff --git a/vllm/engine/metrics_types.py b/vllm/engine/metrics_types.py index 7f0c2fa7..9e6d5ef2 100644 --- a/vllm/engine/metrics_types.py +++ b/vllm/engine/metrics_types.py @@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client. import time from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Dict, List, Optional, Protocol +from typing import List, Optional -from vllm.config import VllmConfig +from vllm.config import SupportsMetricsInfo, VllmConfig from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics @@ -70,12 +70,6 @@ class Stats: spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None -class SupportsMetricsInfo(Protocol): - - def metrics_info(self) -> Dict[str, str]: - ... - - class StatLoggerBase(ABC): """Base class for StatLogger.""" diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 5019e2b3..e112a9f3 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -7,7 +7,7 @@ from typing import Dict, List import numpy as np import prometheus_client -from vllm.config import VllmConfig +from vllm.config import SupportsMetricsInfo, VllmConfig from vllm.logger import init_logger from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics from vllm.v1.engine import FinishReason @@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase): buckets=request_latency_buckets, labelnames=labelnames).labels(*labelvalues) + self.log_metrics_info("cache_config", vllm_config.cache_config) + + def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo): + metrics_info = config_obj.metrics_info() + + name, documentation = None, None + if type == "cache_config": + name = "vllm:cache_config_info" + documentation = "Information of the LLMEngine CacheConfig" + assert name is not None, f"Unknown metrics info type {type}" + + # Info type metrics are syntactic sugar for a gauge permanently set to 1 + # Since prometheus multiprocessing mode does not support Info, emulate + # info here with a gauge. + info_gauge = prometheus_client.Gauge( + name=name, + documentation=documentation, + labelnames=metrics_info.keys()).labels(**metrics_info) + info_gauge.set(1) + def log(self, scheduler_stats: SchedulerStats, iteration_stats: IterationStats): """Log to prometheus."""