diff --git a/docs/source/design/v1/metrics.md b/docs/source/design/v1/metrics.md index 8c504511..3f962907 100644 --- a/docs/source/design/v1/metrics.md +++ b/docs/source/design/v1/metrics.md @@ -86,6 +86,17 @@ See [the PR which added this Dashboard](gh-pr:2316) for interesting and useful b Prometheus support was initially added [using the aioprometheus library](gh-pr:1890), but a switch was made quickly to [prometheus_client](gh-pr:2730). The rationale is discussed in both linked PRs. +With the switch to `aioprometheus`, we lost a `MetricsMiddleware` to track HTTP metrics, but this was reinstated [using prometheus_fastapi_instrumentator](gh-pr:15657): + +```bash +$ curl http://0.0.0.0:8000/metrics 2>/dev/null | grep -P '^http_(?!.*(_bucket|_created|_sum)).*' +http_requests_total{handler="/v1/completions",method="POST",status="2xx"} 201.0 +http_request_size_bytes_count{handler="/v1/completions"} 201.0 +http_response_size_bytes_count{handler="/v1/completions"} 201.0 +http_request_duration_highr_seconds_count 201.0 +http_request_duration_seconds_count{handler="/v1/completions",method="POST"} 201.0 +``` + ### Multi-process Mode In v0, metrics are collected in the engine core process and we use multi-process mode to make them available in the API server process. See . diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 87a4cdd1..13681958 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -310,33 +310,33 @@ def mount_metrics(app: FastAPI): # We need to set PROMETHEUS_MULTIPROC_DIR environment variable # before prometheus_client is imported. # See https://prometheus.github.io/client_python/multiprocess/ - from prometheus_client import (CollectorRegistry, make_asgi_app, + from prometheus_client import (REGISTRY, CollectorRegistry, make_asgi_app, multiprocess) from prometheus_fastapi_instrumentator import Instrumentator + registry = REGISTRY + prometheus_multiproc_dir_path = os.getenv("PROMETHEUS_MULTIPROC_DIR", None) if prometheus_multiproc_dir_path is not None: logger.debug("vLLM to use %s as PROMETHEUS_MULTIPROC_DIR", prometheus_multiproc_dir_path) registry = CollectorRegistry() multiprocess.MultiProcessCollector(registry) - Instrumentator( - excluded_handlers=[ - "/metrics", - "/health", - "/load", - "/ping", - "/version", - "/server_info", - ], - registry=registry, - ).add().instrument(app).expose(app) - # Add prometheus asgi middleware to route /metrics requests - metrics_route = Mount("/metrics", make_asgi_app(registry=registry)) - else: - # Add prometheus asgi middleware to route /metrics requests - metrics_route = Mount("/metrics", make_asgi_app()) + Instrumentator( + excluded_handlers=[ + "/metrics", + "/health", + "/load", + "/ping", + "/version", + "/server_info", + ], + registry=registry, + ).add().instrument(app).expose(app) + + # Add prometheus asgi middleware to route /metrics requests + metrics_route = Mount("/metrics", make_asgi_app(registry=registry)) # Workaround for 307 Redirect for /metrics metrics_route.path_regex = re.compile("^/metrics(?P.*)$")