[Metrics] Add --show-hidden-metrics-for-version CLI arg (#13295)

This commit is contained in:
Mark McLoughlin 2025-02-22 08:20:45 +00:00 committed by GitHub
parent 1cd981da4f
commit 2cb8c1540e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 95 additions and 1 deletions

View File

@ -36,3 +36,11 @@ The following metrics are exposed:
:language: python :language: python
:start-after: begin-metrics-definitions :start-after: begin-metrics-definitions
::: :::
The following metrics are deprecated and due to be removed in a future version:
- *(No metrics are currently deprecated)*
Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1`
but can be re-enabled using the `--show-hidden-metrics-for-version=X.Y` escape hatch,
and are then removed in version `X.Y+2`.

36
tests/test_version.py Normal file
View File

@ -0,0 +1,36 @@
# SPDX-License-Identifier: Apache-2.0
from unittest.mock import patch
import pytest
from vllm import version
def test_version_is_defined():
assert version.__version__ is not None
def test_version_tuple():
assert len(version.__version_tuple__) in (3, 4, 5)
@pytest.mark.parametrize(
"version_tuple, version_str, expected",
[
((0, 0, "dev"), "0.0", True),
((0, 0, "dev"), "foobar", True),
((0, 7, 4), "0.6", True),
((0, 7, 4), "0.5", False),
((0, 7, 4), "0.7", False),
((1, 2, 3), "1.1", True),
((1, 2, 3), "1.0", False),
((1, 2, 3), "1.2", False),
# This won't work as expected
((1, 0, 0), "1.-1", True),
((1, 0, 0), "0.9", False),
((1, 0, 0), "0.17", False),
])
def test_prev_minor_version_was(version_tuple, version_str, expected):
with patch("vllm.version.__version_tuple__", version_tuple):
assert version._prev_minor_version_was(version_str) == expected

View File

@ -2653,7 +2653,9 @@ class DecodingConfig:
@dataclass @dataclass
class ObservabilityConfig: class ObservabilityConfig:
"""Configuration for observability.""" """Configuration for observability - metrics and tracing."""
show_hidden_metrics: bool = False
otlp_traces_endpoint: Optional[str] = None otlp_traces_endpoint: Optional[str] = None
# Collecting detailed timing information for each request can be expensive. # Collecting detailed timing information for each request can be expensive.

View File

@ -10,6 +10,7 @@ from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
import torch import torch
import vllm.envs as envs import vllm.envs as envs
from vllm import version
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat, from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
DecodingConfig, DeviceConfig, HfOverrides, DecodingConfig, DeviceConfig, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
@ -188,6 +189,7 @@ class EngineArgs:
qlora_adapter_name_or_path: Optional[str] = None qlora_adapter_name_or_path: Optional[str] = None
disable_logprobs_during_spec_decoding: Optional[bool] = None disable_logprobs_during_spec_decoding: Optional[bool] = None
show_hidden_metrics_for_version: Optional[str] = None
otlp_traces_endpoint: Optional[str] = None otlp_traces_endpoint: Optional[str] = None
collect_detailed_traces: Optional[str] = None collect_detailed_traces: Optional[str] = None
disable_async_output_proc: bool = False disable_async_output_proc: bool = False
@ -909,6 +911,18 @@ class EngineArgs:
default=None, default=None,
help='Name or path of the QLoRA adapter.') help='Name or path of the QLoRA adapter.')
parser.add_argument('--show-hidden-metrics-for-version',
type=str,
default=None,
help='Enable deprecated Prometheus metrics that '
'have been hidden since the specified version. '
'For example, if a previously deprecated metric '
'has been hidden since the v0.7.0 release, you '
'use --show-hidden-metrics-for-version=0.7 as a '
'temporary escape hatch while you migrate to new '
'metrics. The metric is likely to be removed '
'completely in an upcoming release.')
parser.add_argument( parser.add_argument(
'--otlp-traces-endpoint', '--otlp-traces-endpoint',
type=str, type=str,
@ -1317,6 +1331,11 @@ class EngineArgs:
decoding_config = DecodingConfig( decoding_config = DecodingConfig(
guided_decoding_backend=self.guided_decoding_backend) guided_decoding_backend=self.guided_decoding_backend)
show_hidden_metrics = False
if self.show_hidden_metrics_for_version is not None:
show_hidden_metrics = version._prev_minor_version_was(
self.show_hidden_metrics_for_version)
detailed_trace_modules = [] detailed_trace_modules = []
if self.collect_detailed_traces is not None: if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",") detailed_trace_modules = self.collect_detailed_traces.split(",")
@ -1326,6 +1345,7 @@ class EngineArgs:
f"Invalid module {m} in collect_detailed_traces. " f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}") f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
observability_config = ObservabilityConfig( observability_config = ObservabilityConfig(
show_hidden_metrics=show_hidden_metrics,
otlp_traces_endpoint=self.otlp_traces_endpoint, otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules collect_model_forward_time="model" in detailed_trace_modules
or "all" in detailed_trace_modules, or "all" in detailed_trace_modules,

View File

@ -516,6 +516,11 @@ class PrometheusStatLogger(StatLoggerBase):
self.metrics = self._metrics_cls(labelnames=list(labels.keys()), self.metrics = self._metrics_cls(labelnames=list(labels.keys()),
vllm_config=vllm_config) vllm_config=vllm_config)
# Use this flag to hide metrics that were deprecated in
# a previous release and which will be removed future
self.show_hidden_metrics = \
vllm_config.observability_config.show_hidden_metrics
def _log_gauge(self, gauge, data: Union[int, float]) -> None: def _log_gauge(self, gauge, data: Union[int, float]) -> None:
# Convenience function for logging to gauge. # Convenience function for logging to gauge.
gauge.labels(**self.labels).set(data) gauge.labels(**self.labels).set(data)

View File

@ -95,6 +95,11 @@ class PrometheusStatLogger(StatLoggerBase):
def __init__(self, vllm_config: VllmConfig): def __init__(self, vllm_config: VllmConfig):
self._unregister_vllm_metrics() self._unregister_vllm_metrics()
# Use this flag to hide metrics that were deprecated in
# a previous release and which will be removed future
self.show_hidden_metrics = \
vllm_config.observability_config.show_hidden_metrics
labelnames = ["model_name"] labelnames = ["model_name"]
labelvalues = [vllm_config.model_config.served_model_name] labelvalues = [vllm_config.model_config.served_model_name]

View File

@ -11,3 +11,21 @@ except Exception as e:
__version__ = "dev" __version__ = "dev"
__version_tuple__ = (0, 0, __version__) __version_tuple__ = (0, 0, __version__)
def _prev_minor_version_was(version_str):
"""Check whether a given version matches the previous minor version.
Return True if version_str matches the previous minor version.
For example - return True if the current version if 0.7.4 and the
supplied version_str is '0.6'.
Used for --show-hidden-metrics-for-version.
"""
# Match anything if this is a dev tree
if __version_tuple__[0:2] == (0, 0):
return True
# Note - this won't do the right thing when we release 1.0!
return version_str == f"{__version_tuple__[0]}.{__version_tuple__[1] - 1}"