[V1][Metrics] Support vllm:cache_config_info (#13299)

This commit is contained in:
Mark McLoughlin 2025-02-22 08:20:00 +00:00 committed by GitHub
parent fca20841c2
commit 1cd981da4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 32 additions and 12 deletions

View File

@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:iteration_tokens_total",
"vllm:cache_config_info",
"vllm:request_success_total",
"vllm:request_prompt_tokens_sum",
"vllm:request_prompt_tokens_bucket",

View File

@ -88,6 +88,12 @@ class SupportsHash(Protocol):
...
class SupportsMetricsInfo(Protocol):
def metrics_info(self) -> Dict[str, str]:
...
class ModelImpl(str, enum.Enum):
AUTO = "auto"
VLLM = "vllm"

View File

@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast
import numpy as np
import prometheus_client
from vllm.config import VllmConfig
from vllm.engine.metrics_types import (StatLoggerBase, Stats,
SupportsMetricsInfo)
from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.engine.metrics_types import StatLoggerBase, Stats
from vllm.executor.ray_utils import ray
from vllm.logger import init_logger

View File

@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client.
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Optional, Protocol
from typing import List, Optional
from vllm.config import VllmConfig
from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
@ -70,12 +70,6 @@ class Stats:
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
class SupportsMetricsInfo(Protocol):
def metrics_info(self) -> Dict[str, str]:
...
class StatLoggerBase(ABC):
"""Base class for StatLogger."""

View File

@ -7,7 +7,7 @@ from typing import Dict, List
import numpy as np
import prometheus_client
from vllm.config import VllmConfig
from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.logger import init_logger
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
from vllm.v1.engine import FinishReason
@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase):
buckets=request_latency_buckets,
labelnames=labelnames).labels(*labelvalues)
self.log_metrics_info("cache_config", vllm_config.cache_config)
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
metrics_info = config_obj.metrics_info()
name, documentation = None, None
if type == "cache_config":
name = "vllm:cache_config_info"
documentation = "Information of the LLMEngine CacheConfig"
assert name is not None, f"Unknown metrics info type {type}"
# Info type metrics are syntactic sugar for a gauge permanently set to 1
# Since prometheus multiprocessing mode does not support Info, emulate
# info here with a gauge.
info_gauge = prometheus_client.Gauge(
name=name,
documentation=documentation,
labelnames=metrics_info.keys()).labels(**metrics_info)
info_gauge.set(1)
def log(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats):
"""Log to prometheus."""