[V1][Metrics] Support vllm:cache_config_info (#13299)

This commit is contained in:
Mark McLoughlin 2025-02-22 08:20:00 +00:00 committed by GitHub
parent fca20841c2
commit 1cd981da4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 32 additions and 12 deletions

View File

@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [
"vllm:prompt_tokens_total", "vllm:prompt_tokens_total",
"vllm:generation_tokens_total", "vllm:generation_tokens_total",
"vllm:iteration_tokens_total", "vllm:iteration_tokens_total",
"vllm:cache_config_info",
"vllm:request_success_total", "vllm:request_success_total",
"vllm:request_prompt_tokens_sum", "vllm:request_prompt_tokens_sum",
"vllm:request_prompt_tokens_bucket", "vllm:request_prompt_tokens_bucket",

View File

@ -88,6 +88,12 @@ class SupportsHash(Protocol):
... ...
class SupportsMetricsInfo(Protocol):
def metrics_info(self) -> Dict[str, str]:
...
class ModelImpl(str, enum.Enum): class ModelImpl(str, enum.Enum):
AUTO = "auto" AUTO = "auto"
VLLM = "vllm" VLLM = "vllm"

View File

@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast
import numpy as np import numpy as np
import prometheus_client import prometheus_client
from vllm.config import VllmConfig from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.engine.metrics_types import (StatLoggerBase, Stats, from vllm.engine.metrics_types import StatLoggerBase, Stats
SupportsMetricsInfo)
from vllm.executor.ray_utils import ray from vllm.executor.ray_utils import ray
from vllm.logger import init_logger from vllm.logger import init_logger

View File

@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client.
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Protocol from typing import List, Optional
from vllm.config import VllmConfig from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
@ -70,12 +70,6 @@ class Stats:
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
class SupportsMetricsInfo(Protocol):
def metrics_info(self) -> Dict[str, str]:
...
class StatLoggerBase(ABC): class StatLoggerBase(ABC):
"""Base class for StatLogger.""" """Base class for StatLogger."""

View File

@ -7,7 +7,7 @@ from typing import Dict, List
import numpy as np import numpy as np
import prometheus_client import prometheus_client
from vllm.config import VllmConfig from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
from vllm.v1.engine import FinishReason from vllm.v1.engine import FinishReason
@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase):
buckets=request_latency_buckets, buckets=request_latency_buckets,
labelnames=labelnames).labels(*labelvalues) labelnames=labelnames).labels(*labelvalues)
self.log_metrics_info("cache_config", vllm_config.cache_config)
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
metrics_info = config_obj.metrics_info()
name, documentation = None, None
if type == "cache_config":
name = "vllm:cache_config_info"
documentation = "Information of the LLMEngine CacheConfig"
assert name is not None, f"Unknown metrics info type {type}"
# Info type metrics are syntactic sugar for a gauge permanently set to 1
# Since prometheus multiprocessing mode does not support Info, emulate
# info here with a gauge.
info_gauge = prometheus_client.Gauge(
name=name,
documentation=documentation,
labelnames=metrics_info.keys()).labels(**metrics_info)
info_gauge.set(1)
def log(self, scheduler_stats: SchedulerStats, def log(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats): iteration_stats: IterationStats):
"""Log to prometheus.""" """Log to prometheus."""