From 2cb8c1540e27ffebdb668a8f10ec7b8b7703aab3 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sat, 22 Feb 2025 08:20:45 +0000 Subject: [PATCH] [Metrics] Add `--show-hidden-metrics-for-version` CLI arg (#13295) --- docs/source/serving/metrics.md | 8 ++++++++ tests/test_version.py | 36 ++++++++++++++++++++++++++++++++++ vllm/config.py | 4 +++- vllm/engine/arg_utils.py | 20 +++++++++++++++++++ vllm/engine/metrics.py | 5 +++++ vllm/v1/metrics/loggers.py | 5 +++++ vllm/version.py | 18 +++++++++++++++++ 7 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 tests/test_version.py diff --git a/docs/source/serving/metrics.md b/docs/source/serving/metrics.md index 6c0dc888..1d55f201 100644 --- a/docs/source/serving/metrics.md +++ b/docs/source/serving/metrics.md @@ -36,3 +36,11 @@ The following metrics are exposed: :language: python :start-after: begin-metrics-definitions ::: + +The following metrics are deprecated and due to be removed in a future version: + +- *(No metrics are currently deprecated)* + +Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1` +but can be re-enabled using the `--show-hidden-metrics-for-version=X.Y` escape hatch, +and are then removed in version `X.Y+2`. diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 00000000..56842b6d --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 + +from unittest.mock import patch + +import pytest + +from vllm import version + + +def test_version_is_defined(): + assert version.__version__ is not None + + +def test_version_tuple(): + assert len(version.__version_tuple__) in (3, 4, 5) + + +@pytest.mark.parametrize( + "version_tuple, version_str, expected", + [ + ((0, 0, "dev"), "0.0", True), + ((0, 0, "dev"), "foobar", True), + ((0, 7, 4), "0.6", True), + ((0, 7, 4), "0.5", False), + ((0, 7, 4), "0.7", False), + ((1, 2, 3), "1.1", True), + ((1, 2, 3), "1.0", False), + ((1, 2, 3), "1.2", False), + # This won't work as expected + ((1, 0, 0), "1.-1", True), + ((1, 0, 0), "0.9", False), + ((1, 0, 0), "0.17", False), + ]) +def test_prev_minor_version_was(version_tuple, version_str, expected): + with patch("vllm.version.__version_tuple__", version_tuple): + assert version._prev_minor_version_was(version_str) == expected diff --git a/vllm/config.py b/vllm/config.py index dbcacdf4..797697aa 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2653,7 +2653,9 @@ class DecodingConfig: @dataclass class ObservabilityConfig: - """Configuration for observability.""" + """Configuration for observability - metrics and tracing.""" + show_hidden_metrics: bool = False + otlp_traces_endpoint: Optional[str] = None # Collecting detailed timing information for each request can be expensive. diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8b460b33..d75e2324 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -10,6 +10,7 @@ from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional, import torch import vllm.envs as envs +from vllm import version from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat, DecodingConfig, DeviceConfig, HfOverrides, KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, @@ -188,6 +189,7 @@ class EngineArgs: qlora_adapter_name_or_path: Optional[str] = None disable_logprobs_during_spec_decoding: Optional[bool] = None + show_hidden_metrics_for_version: Optional[str] = None otlp_traces_endpoint: Optional[str] = None collect_detailed_traces: Optional[str] = None disable_async_output_proc: bool = False @@ -909,6 +911,18 @@ class EngineArgs: default=None, help='Name or path of the QLoRA adapter.') + parser.add_argument('--show-hidden-metrics-for-version', + type=str, + default=None, + help='Enable deprecated Prometheus metrics that ' + 'have been hidden since the specified version. ' + 'For example, if a previously deprecated metric ' + 'has been hidden since the v0.7.0 release, you ' + 'use --show-hidden-metrics-for-version=0.7 as a ' + 'temporary escape hatch while you migrate to new ' + 'metrics. The metric is likely to be removed ' + 'completely in an upcoming release.') + parser.add_argument( '--otlp-traces-endpoint', type=str, @@ -1317,6 +1331,11 @@ class EngineArgs: decoding_config = DecodingConfig( guided_decoding_backend=self.guided_decoding_backend) + show_hidden_metrics = False + if self.show_hidden_metrics_for_version is not None: + show_hidden_metrics = version._prev_minor_version_was( + self.show_hidden_metrics_for_version) + detailed_trace_modules = [] if self.collect_detailed_traces is not None: detailed_trace_modules = self.collect_detailed_traces.split(",") @@ -1326,6 +1345,7 @@ class EngineArgs: f"Invalid module {m} in collect_detailed_traces. " f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}") observability_config = ObservabilityConfig( + show_hidden_metrics=show_hidden_metrics, otlp_traces_endpoint=self.otlp_traces_endpoint, collect_model_forward_time="model" in detailed_trace_modules or "all" in detailed_trace_modules, diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index e8736dff..cb3ca7a1 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -516,6 +516,11 @@ class PrometheusStatLogger(StatLoggerBase): self.metrics = self._metrics_cls(labelnames=list(labels.keys()), vllm_config=vllm_config) + # Use this flag to hide metrics that were deprecated in + # a previous release and which will be removed future + self.show_hidden_metrics = \ + vllm_config.observability_config.show_hidden_metrics + def _log_gauge(self, gauge, data: Union[int, float]) -> None: # Convenience function for logging to gauge. gauge.labels(**self.labels).set(data) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index e112a9f3..e562b414 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -95,6 +95,11 @@ class PrometheusStatLogger(StatLoggerBase): def __init__(self, vllm_config: VllmConfig): self._unregister_vllm_metrics() + # Use this flag to hide metrics that were deprecated in + # a previous release and which will be removed future + self.show_hidden_metrics = \ + vllm_config.observability_config.show_hidden_metrics + labelnames = ["model_name"] labelvalues = [vllm_config.model_config.served_model_name] diff --git a/vllm/version.py b/vllm/version.py index 70cd0289..ab5909b1 100644 --- a/vllm/version.py +++ b/vllm/version.py @@ -11,3 +11,21 @@ except Exception as e: __version__ = "dev" __version_tuple__ = (0, 0, __version__) + + +def _prev_minor_version_was(version_str): + """Check whether a given version matches the previous minor version. + + Return True if version_str matches the previous minor version. + + For example - return True if the current version if 0.7.4 and the + supplied version_str is '0.6'. + + Used for --show-hidden-metrics-for-version. + """ + # Match anything if this is a dev tree + if __version_tuple__[0:2] == (0, 0): + return True + + # Note - this won't do the right thing when we release 1.0! + return version_str == f"{__version_tuple__[0]}.{__version_tuple__[1] - 1}"