[V1][Metrics] Support vllm:cache_config_info
(#13299)
This commit is contained in:
parent
fca20841c2
commit
1cd981da4f
@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [
|
|||||||
"vllm:prompt_tokens_total",
|
"vllm:prompt_tokens_total",
|
||||||
"vllm:generation_tokens_total",
|
"vllm:generation_tokens_total",
|
||||||
"vllm:iteration_tokens_total",
|
"vllm:iteration_tokens_total",
|
||||||
|
"vllm:cache_config_info",
|
||||||
"vllm:request_success_total",
|
"vllm:request_success_total",
|
||||||
"vllm:request_prompt_tokens_sum",
|
"vllm:request_prompt_tokens_sum",
|
||||||
"vllm:request_prompt_tokens_bucket",
|
"vllm:request_prompt_tokens_bucket",
|
||||||
|
@ -88,6 +88,12 @@ class SupportsHash(Protocol):
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class SupportsMetricsInfo(Protocol):
|
||||||
|
|
||||||
|
def metrics_info(self) -> Dict[str, str]:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
class ModelImpl(str, enum.Enum):
|
class ModelImpl(str, enum.Enum):
|
||||||
AUTO = "auto"
|
AUTO = "auto"
|
||||||
VLLM = "vllm"
|
VLLM = "vllm"
|
||||||
|
@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import prometheus_client
|
import prometheus_client
|
||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import SupportsMetricsInfo, VllmConfig
|
||||||
from vllm.engine.metrics_types import (StatLoggerBase, Stats,
|
from vllm.engine.metrics_types import StatLoggerBase, Stats
|
||||||
SupportsMetricsInfo)
|
|
||||||
from vllm.executor.ray_utils import ray
|
from vllm.executor.ray_utils import ray
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
|
@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client.
|
|||||||
import time
|
import time
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, List, Optional, Protocol
|
from typing import List, Optional
|
||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import SupportsMetricsInfo, VllmConfig
|
||||||
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
|
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
|
||||||
|
|
||||||
|
|
||||||
@ -70,12 +70,6 @@ class Stats:
|
|||||||
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
|
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
|
||||||
|
|
||||||
|
|
||||||
class SupportsMetricsInfo(Protocol):
|
|
||||||
|
|
||||||
def metrics_info(self) -> Dict[str, str]:
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
class StatLoggerBase(ABC):
|
class StatLoggerBase(ABC):
|
||||||
"""Base class for StatLogger."""
|
"""Base class for StatLogger."""
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ from typing import Dict, List
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import prometheus_client
|
import prometheus_client
|
||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import SupportsMetricsInfo, VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
|
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
|
||||||
from vllm.v1.engine import FinishReason
|
from vllm.v1.engine import FinishReason
|
||||||
@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase):
|
|||||||
buckets=request_latency_buckets,
|
buckets=request_latency_buckets,
|
||||||
labelnames=labelnames).labels(*labelvalues)
|
labelnames=labelnames).labels(*labelvalues)
|
||||||
|
|
||||||
|
self.log_metrics_info("cache_config", vllm_config.cache_config)
|
||||||
|
|
||||||
|
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
|
||||||
|
metrics_info = config_obj.metrics_info()
|
||||||
|
|
||||||
|
name, documentation = None, None
|
||||||
|
if type == "cache_config":
|
||||||
|
name = "vllm:cache_config_info"
|
||||||
|
documentation = "Information of the LLMEngine CacheConfig"
|
||||||
|
assert name is not None, f"Unknown metrics info type {type}"
|
||||||
|
|
||||||
|
# Info type metrics are syntactic sugar for a gauge permanently set to 1
|
||||||
|
# Since prometheus multiprocessing mode does not support Info, emulate
|
||||||
|
# info here with a gauge.
|
||||||
|
info_gauge = prometheus_client.Gauge(
|
||||||
|
name=name,
|
||||||
|
documentation=documentation,
|
||||||
|
labelnames=metrics_info.keys()).labels(**metrics_info)
|
||||||
|
info_gauge.set(1)
|
||||||
|
|
||||||
def log(self, scheduler_stats: SchedulerStats,
|
def log(self, scheduler_stats: SchedulerStats,
|
||||||
iteration_stats: IterationStats):
|
iteration_stats: IterationStats):
|
||||||
"""Log to prometheus."""
|
"""Log to prometheus."""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user