allow user to chose which vllm's merics to display in grafana (#3393)

This commit is contained in:
Allen.Dou 2024-03-14 14:35:13 +08:00 committed by GitHub
parent 81653d9688
commit a37415c31b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,35 +1,4 @@
{ {
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.2.3"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": { "annotations": {
"list": [ "list": [
{ {
@ -42,6 +11,12 @@
"hide": true, "hide": true,
"iconColor": "rgba(0, 211, 255, 1)", "iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts", "name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard" "type": "dashboard"
} }
] ]
@ -50,14 +25,14 @@
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 0, "graphTooltip": 0,
"id": null, "id": 29,
"links": [], "links": [],
"liveNow": false, "liveNow": false,
"panels": [ "panels": [
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "End to end request latency measured in seconds.", "description": "End to end request latency measured in seconds.",
"fieldConfig": { "fieldConfig": {
@ -66,7 +41,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -80,7 +54,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -138,11 +111,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": false, "includeNullMetadata": false,
"instant": false, "instant": false,
@ -154,11 +127,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -171,11 +144,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -188,11 +161,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -205,10 +178,10 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "rate(vllm:e2e_request_latency_seconds_sum[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count[$__rate_interval])", "expr": "rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false, "hide": false,
"instant": false, "instant": false,
"legendFormat": "Average", "legendFormat": "Average",
@ -222,7 +195,7 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "Number of tokens processed per second", "description": "Number of tokens processed per second",
"fieldConfig": { "fieldConfig": {
@ -231,7 +204,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -245,7 +217,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -302,11 +273,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "rate(vllm:prompt_tokens_total[$__rate_interval])", "expr": "rate(vllm:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": false, "includeNullMetadata": false,
"instant": false, "instant": false,
@ -318,11 +289,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "rate(vllm:generation_tokens_total[$__rate_interval])", "expr": "rate(vllm:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -339,7 +310,7 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "Inter token latency in seconds.", "description": "Inter token latency in seconds.",
"fieldConfig": { "fieldConfig": {
@ -348,7 +319,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -362,7 +332,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -420,11 +389,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": false, "includeNullMetadata": false,
"instant": false, "instant": false,
@ -436,11 +405,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -453,11 +422,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -470,11 +439,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -487,10 +456,10 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "rate(vllm:time_per_output_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count[$__rate_interval])", "expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false, "hide": false,
"instant": false, "instant": false,
"legendFormat": "Mean", "legendFormat": "Mean",
@ -504,7 +473,7 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "Number of requests in RUNNING, WAITING, and SWAPPED state", "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
"fieldConfig": { "fieldConfig": {
@ -513,7 +482,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -527,7 +495,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -585,11 +552,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "vllm:num_requests_running", "expr": "vllm:num_requests_running{model_name=\"$model_name\"}",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": true, "includeNullMetadata": true,
"instant": false, "instant": false,
@ -601,11 +568,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "vllm:num_requests_swapped", "expr": "vllm:num_requests_swapped{model_name=\"$model_name\"}",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": true, "includeNullMetadata": true,
@ -618,11 +585,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "vllm:num_requests_waiting", "expr": "vllm:num_requests_waiting{model_name=\"$model_name\"}",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": true, "includeNullMetadata": true,
@ -639,7 +606,7 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "P50, P90, P95, and P99 TTFT latency in seconds.", "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
"fieldConfig": { "fieldConfig": {
@ -648,7 +615,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -662,7 +628,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -720,11 +685,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -737,11 +702,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": false, "includeNullMetadata": false,
"instant": false, "instant": false,
@ -753,11 +718,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -770,11 +735,11 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false, "fullMetaSearch": false,
"hide": false, "hide": false,
"includeNullMetadata": false, "includeNullMetadata": false,
@ -787,10 +752,10 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "rate(vllm:time_to_first_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count[$__rate_interval])", "expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false, "hide": false,
"instant": false, "instant": false,
"legendFormat": "Average", "legendFormat": "Average",
@ -804,7 +769,7 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"description": "Percentage of used cache blocks by vLLM.", "description": "Percentage of used cache blocks by vLLM.",
"fieldConfig": { "fieldConfig": {
@ -813,7 +778,6 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisBorderShow": false,
"axisCenteredZero": false, "axisCenteredZero": false,
"axisColorMode": "text", "axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
@ -827,7 +791,6 @@
"tooltip": false, "tooltip": false,
"viz": false "viz": false
}, },
"insertNulls": false,
"lineInterpolation": "linear", "lineInterpolation": "linear",
"lineWidth": 1, "lineWidth": 1,
"pointSize": 5, "pointSize": 5,
@ -885,10 +848,10 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "vllm:gpu_cache_usage_perc", "expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
"instant": false, "instant": false,
"legendFormat": "GPU Cache Usage", "legendFormat": "GPU Cache Usage",
"range": true, "range": true,
@ -897,10 +860,10 @@
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "${DS_PROMETHEUS}" "uid": "prometheus"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "vllm:cpu_cache_usage_perc", "expr": "vllm:cpu_cache_usage_perc{model_name=\"$model_name\"}",
"hide": false, "hide": false,
"instant": false, "instant": false,
"legendFormat": "CPU Cache Usage", "legendFormat": "CPU Cache Usage",
@ -913,10 +876,39 @@
} }
], ],
"refresh": "", "refresh": "",
"schemaVersion": 39, "schemaVersion": 37,
"style": "dark",
"tags": [], "tags": [],
"templating": { "templating": {
"list": [] "list": [
{
"current": {
"selected": false,
"text": "vllm",
"value": "vllm"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(model_name)",
"hide": 0,
"includeAll": false,
"label": "model_name",
"multi": false,
"name": "model_name",
"options": [],
"query": {
"query": "label_values(model_name)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
}, },
"time": { "time": {
"from": "now-5m", "from": "now-5m",