[Metrics] Add --show-hidden-metrics-for-version
CLI arg (#13295)
This commit is contained in:
parent
1cd981da4f
commit
2cb8c1540e
@ -36,3 +36,11 @@ The following metrics are exposed:
|
|||||||
:language: python
|
:language: python
|
||||||
:start-after: begin-metrics-definitions
|
:start-after: begin-metrics-definitions
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
The following metrics are deprecated and due to be removed in a future version:
|
||||||
|
|
||||||
|
- *(No metrics are currently deprecated)*
|
||||||
|
|
||||||
|
Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1`
|
||||||
|
but can be re-enabled using the `--show-hidden-metrics-for-version=X.Y` escape hatch,
|
||||||
|
and are then removed in version `X.Y+2`.
|
||||||
|
36
tests/test_version.py
Normal file
36
tests/test_version.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from vllm import version
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_is_defined():
|
||||||
|
assert version.__version__ is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_tuple():
|
||||||
|
assert len(version.__version_tuple__) in (3, 4, 5)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"version_tuple, version_str, expected",
|
||||||
|
[
|
||||||
|
((0, 0, "dev"), "0.0", True),
|
||||||
|
((0, 0, "dev"), "foobar", True),
|
||||||
|
((0, 7, 4), "0.6", True),
|
||||||
|
((0, 7, 4), "0.5", False),
|
||||||
|
((0, 7, 4), "0.7", False),
|
||||||
|
((1, 2, 3), "1.1", True),
|
||||||
|
((1, 2, 3), "1.0", False),
|
||||||
|
((1, 2, 3), "1.2", False),
|
||||||
|
# This won't work as expected
|
||||||
|
((1, 0, 0), "1.-1", True),
|
||||||
|
((1, 0, 0), "0.9", False),
|
||||||
|
((1, 0, 0), "0.17", False),
|
||||||
|
])
|
||||||
|
def test_prev_minor_version_was(version_tuple, version_str, expected):
|
||||||
|
with patch("vllm.version.__version_tuple__", version_tuple):
|
||||||
|
assert version._prev_minor_version_was(version_str) == expected
|
@ -2653,7 +2653,9 @@ class DecodingConfig:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ObservabilityConfig:
|
class ObservabilityConfig:
|
||||||
"""Configuration for observability."""
|
"""Configuration for observability - metrics and tracing."""
|
||||||
|
show_hidden_metrics: bool = False
|
||||||
|
|
||||||
otlp_traces_endpoint: Optional[str] = None
|
otlp_traces_endpoint: Optional[str] = None
|
||||||
|
|
||||||
# Collecting detailed timing information for each request can be expensive.
|
# Collecting detailed timing information for each request can be expensive.
|
||||||
|
@ -10,6 +10,7 @@ from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
|
from vllm import version
|
||||||
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
|
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
|
||||||
DecodingConfig, DeviceConfig, HfOverrides,
|
DecodingConfig, DeviceConfig, HfOverrides,
|
||||||
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
|
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
|
||||||
@ -188,6 +189,7 @@ class EngineArgs:
|
|||||||
qlora_adapter_name_or_path: Optional[str] = None
|
qlora_adapter_name_or_path: Optional[str] = None
|
||||||
disable_logprobs_during_spec_decoding: Optional[bool] = None
|
disable_logprobs_during_spec_decoding: Optional[bool] = None
|
||||||
|
|
||||||
|
show_hidden_metrics_for_version: Optional[str] = None
|
||||||
otlp_traces_endpoint: Optional[str] = None
|
otlp_traces_endpoint: Optional[str] = None
|
||||||
collect_detailed_traces: Optional[str] = None
|
collect_detailed_traces: Optional[str] = None
|
||||||
disable_async_output_proc: bool = False
|
disable_async_output_proc: bool = False
|
||||||
@ -909,6 +911,18 @@ class EngineArgs:
|
|||||||
default=None,
|
default=None,
|
||||||
help='Name or path of the QLoRA adapter.')
|
help='Name or path of the QLoRA adapter.')
|
||||||
|
|
||||||
|
parser.add_argument('--show-hidden-metrics-for-version',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help='Enable deprecated Prometheus metrics that '
|
||||||
|
'have been hidden since the specified version. '
|
||||||
|
'For example, if a previously deprecated metric '
|
||||||
|
'has been hidden since the v0.7.0 release, you '
|
||||||
|
'use --show-hidden-metrics-for-version=0.7 as a '
|
||||||
|
'temporary escape hatch while you migrate to new '
|
||||||
|
'metrics. The metric is likely to be removed '
|
||||||
|
'completely in an upcoming release.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--otlp-traces-endpoint',
|
'--otlp-traces-endpoint',
|
||||||
type=str,
|
type=str,
|
||||||
@ -1317,6 +1331,11 @@ class EngineArgs:
|
|||||||
decoding_config = DecodingConfig(
|
decoding_config = DecodingConfig(
|
||||||
guided_decoding_backend=self.guided_decoding_backend)
|
guided_decoding_backend=self.guided_decoding_backend)
|
||||||
|
|
||||||
|
show_hidden_metrics = False
|
||||||
|
if self.show_hidden_metrics_for_version is not None:
|
||||||
|
show_hidden_metrics = version._prev_minor_version_was(
|
||||||
|
self.show_hidden_metrics_for_version)
|
||||||
|
|
||||||
detailed_trace_modules = []
|
detailed_trace_modules = []
|
||||||
if self.collect_detailed_traces is not None:
|
if self.collect_detailed_traces is not None:
|
||||||
detailed_trace_modules = self.collect_detailed_traces.split(",")
|
detailed_trace_modules = self.collect_detailed_traces.split(",")
|
||||||
@ -1326,6 +1345,7 @@ class EngineArgs:
|
|||||||
f"Invalid module {m} in collect_detailed_traces. "
|
f"Invalid module {m} in collect_detailed_traces. "
|
||||||
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
|
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
|
||||||
observability_config = ObservabilityConfig(
|
observability_config = ObservabilityConfig(
|
||||||
|
show_hidden_metrics=show_hidden_metrics,
|
||||||
otlp_traces_endpoint=self.otlp_traces_endpoint,
|
otlp_traces_endpoint=self.otlp_traces_endpoint,
|
||||||
collect_model_forward_time="model" in detailed_trace_modules
|
collect_model_forward_time="model" in detailed_trace_modules
|
||||||
or "all" in detailed_trace_modules,
|
or "all" in detailed_trace_modules,
|
||||||
|
@ -516,6 +516,11 @@ class PrometheusStatLogger(StatLoggerBase):
|
|||||||
self.metrics = self._metrics_cls(labelnames=list(labels.keys()),
|
self.metrics = self._metrics_cls(labelnames=list(labels.keys()),
|
||||||
vllm_config=vllm_config)
|
vllm_config=vllm_config)
|
||||||
|
|
||||||
|
# Use this flag to hide metrics that were deprecated in
|
||||||
|
# a previous release and which will be removed future
|
||||||
|
self.show_hidden_metrics = \
|
||||||
|
vllm_config.observability_config.show_hidden_metrics
|
||||||
|
|
||||||
def _log_gauge(self, gauge, data: Union[int, float]) -> None:
|
def _log_gauge(self, gauge, data: Union[int, float]) -> None:
|
||||||
# Convenience function for logging to gauge.
|
# Convenience function for logging to gauge.
|
||||||
gauge.labels(**self.labels).set(data)
|
gauge.labels(**self.labels).set(data)
|
||||||
|
@ -95,6 +95,11 @@ class PrometheusStatLogger(StatLoggerBase):
|
|||||||
def __init__(self, vllm_config: VllmConfig):
|
def __init__(self, vllm_config: VllmConfig):
|
||||||
self._unregister_vllm_metrics()
|
self._unregister_vllm_metrics()
|
||||||
|
|
||||||
|
# Use this flag to hide metrics that were deprecated in
|
||||||
|
# a previous release and which will be removed future
|
||||||
|
self.show_hidden_metrics = \
|
||||||
|
vllm_config.observability_config.show_hidden_metrics
|
||||||
|
|
||||||
labelnames = ["model_name"]
|
labelnames = ["model_name"]
|
||||||
labelvalues = [vllm_config.model_config.served_model_name]
|
labelvalues = [vllm_config.model_config.served_model_name]
|
||||||
|
|
||||||
|
@ -11,3 +11,21 @@ except Exception as e:
|
|||||||
|
|
||||||
__version__ = "dev"
|
__version__ = "dev"
|
||||||
__version_tuple__ = (0, 0, __version__)
|
__version_tuple__ = (0, 0, __version__)
|
||||||
|
|
||||||
|
|
||||||
|
def _prev_minor_version_was(version_str):
|
||||||
|
"""Check whether a given version matches the previous minor version.
|
||||||
|
|
||||||
|
Return True if version_str matches the previous minor version.
|
||||||
|
|
||||||
|
For example - return True if the current version if 0.7.4 and the
|
||||||
|
supplied version_str is '0.6'.
|
||||||
|
|
||||||
|
Used for --show-hidden-metrics-for-version.
|
||||||
|
"""
|
||||||
|
# Match anything if this is a dev tree
|
||||||
|
if __version_tuple__[0:2] == (0, 0):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Note - this won't do the right thing when we release 1.0!
|
||||||
|
return version_str == f"{__version_tuple__[0]}.{__version_tuple__[1] - 1}"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user