{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 2, "links": [], "panels": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "description": "Memory usage of the PagedAttentionCache", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 10737418240, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "yellow", "value": 5368709120 }, { "color": "red", "value": 8589934592 } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 8, "w": 6, "x": 0, "y": 0 }, "id": 2, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "12.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "kv_cache_memory_bytes", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "KV Cache Memory Usage", "transparent": true, "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "dark-blue" } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 6, "x": 6, "y": 0 }, "id": 13, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "12.0.0", "targets": [ { "disableTextWrap": false, "editorMode": "builder", "expr": "active_requests_count", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Active Requests", "transparent": true, "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "dark-orange" } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 6, "x": 12, "y": 0 }, "id": 14, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "12.0.0", "targets": [ { "disableTextWrap": false, "editorMode": "builder", "expr": "waiting_requests_count", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Waiting Requests", "transparent": true, "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "description": "Ratio of decode tokens to prefill tokens in a batch", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "blue" } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 6, "x": 18, "y": 0 }, "id": 6, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "12.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "decode_prefill_ratio", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Decode/Prefill Ratio", "transparent": true, "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.0", "targets": [ { "editorMode": "code", "expr": "rate(decode_tokens_processed_total[$__rate_interval])", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Decode tokens throupught tok/s", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "id": 11, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.0", "targets": [ { "editorMode": "code", "expr": "rate(prefill_tokens_processed_total[$__rate_interval])", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Prefill rate tok/s", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "id": 9, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.95, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))", "legendFormat": "p95", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "p99", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "p50", "range": true, "refId": "C" } ], "title": "Batch fill percentage percentiles", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "description": "KV Cache Memory Usage Over Time", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "id": 4, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "kv_cache_memory_bytes", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "Used memory", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "kv_cache_free_memory_bytes", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "free memory", "range": true, "refId": "B", "useBackend": false } ], "title": "KV Cache Memory Usage Over Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, "id": 8, "options": { "displayMode": "gradient", "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "maxVizHeight": 300, "minVizHeight": 10, "minVizWidth": 0, "namePlacement": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showUnfilled": true, "sizing": "auto", "valueMode": "color" }, "pluginVersion": "12.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.95, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "p95", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.5, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "legendFormat": "p50", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.99, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "p99", "range": true, "refId": "C", "useBackend": false } ], "title": "Time to First Token (TTFT)", "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, "id": 12, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "editorMode": "code", "expr": "histogram_quantile(0.95, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "p95", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Request latency percentiles", "type": "timeseries" } ], "preload": false, "refresh": "5s", "schemaVersion": 41, "tags": [], "templating": { "list": [] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Transformers Continuous Batching Metrics", "uid": "Lw6CTvVSz", "version": 5 }