mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 13:20:12 +06:00

* stash for now * initial commit * small updated * up * up * works! * nits and fixes * don't loop too much * finish working example * update * fix the small freeblocks issue * feat: stream inputs to continuous batch * fix: update attn from `eager` to `sdpa` * refactor: fmt * refactor: cleanup unnecessary code * feat: add `update` fn to `PagedAttentionCache` * feat: broken optimal block size computation * fix: debugging invalid cache logic * fix: attention mask * refactor: use custom prompts for example * feat: add streaming output * fix: prefill split refactor: add doc strings and unsound/redundant logic fix: compute optimal blocks logic * fix: send decoded tokens when `prefilling_split` -> `decoding` * refactor: move logic to appropriate parent class * fix: remove truncation as we split prefilling anyways refactor: early return when we have enough selected requests * feat: add paged attention forward * push Ggraoh> * add paged sdpa * update * btter mps defaults * feat: add progress bar for `generate_batch` * feat: add opentelemetry metrics (ttft + batch fill %age) * feat: add tracing * Add cuda graphs (#38059) * draft cudagraphs addition * nits * styling * update * fix * kinda draft of what it should look like * fixes * lol * not sure why inf everywhere * can generate but output is shit * some fixes * we should have a single device synch * broken outputs but it does run * refactor * updates * updates with some fixes * fix mask causality * another commit that casts after * add error * simplify example * update * updates * revert llama changes * fix merge conflicts * fix: tracing and metrics * my updates * update script default values * fix block allocation issue * fix prefill split attnetion mask * no bugs * add paged eager * fix * update * style * feat: add pytorch traces * fix * fix * refactor: remove pytorch profiler data * style * nits * cleanup * draft test file * fix * fix * fix paged and graphs * small renamings * cleanups and push * refactor: move tracing and metrics logic to utils * refactor: trace more blocks of code * nits * nits * update * to profile or not to profile * refactor: create new output object * causal by default * cleanup but generations are still off for IDK what reason * simplifications but not running still * this does work. * small quality of life updates * nits * updaet * fix the scheduler * fix warning * ol * fully fixed * nits * different generation parameters * nice * just style * feat: add cache memory usage * feat: add kv cache free memory * feat: add active/waiting count & req latency * do the sampling * fix: synchronize CUDA only if available and improve error handling in ContinuousBatchingManager * fix on mps * feat: add dashboard & histogram buckets * perf: improve waiting reqs data structures * attempt to compile, but we should only do it on mps AFAIK * feat: decouple scheduling logic * just a draft * c;eanup and fixup * optional * style * update * update * remove the draft documentation * fix import as well * update * fix the test * style doomed --------- Co-authored-by: Luc Georges <luc.sydney.georges@gmail.com>
974 lines
33 KiB
JSON
974 lines
33 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"target": {
|
|
"limit": 100,
|
|
"matchAny": false,
|
|
"tags": [],
|
|
"type": "dashboard"
|
|
},
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": 2,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"description": "Memory usage of the PagedAttentionCache",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 10737418240,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 5368709120
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 8589934592
|
|
}
|
|
]
|
|
},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "kv_cache_memory_bytes",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "KV Cache Memory Usage",
|
|
"transparent": true,
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "dark-blue"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 0
|
|
},
|
|
"id": 13,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "active_requests_count",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Active Requests",
|
|
"transparent": true,
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "dark-orange"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"id": 14,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "waiting_requests_count",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Waiting Requests",
|
|
"transparent": true,
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"description": "Ratio of decode tokens to prefill tokens in a batch",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 0
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "decode_prefill_ratio",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Decode/Prefill Ratio",
|
|
"transparent": true,
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"id": 10,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(decode_tokens_processed_total[$__rate_interval])",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Decode tokens throupught tok/s",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"id": 11,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(prefill_tokens_processed_total[$__rate_interval])",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Prefill rate tok/s",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"id": 9,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.95, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))",
|
|
"legendFormat": "p95",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "p99",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.5, sum by(le) (rate(batch_fill_percentage_percent_bucket[$__rate_interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "p50",
|
|
"range": true,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "Batch fill percentage percentiles",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"description": "KV Cache Memory Usage Over Time",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "kv_cache_memory_bytes",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "Used memory",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "kv_cache_free_memory_bytes",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "free memory",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "KV Cache Memory Usage Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "ms"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": false
|
|
},
|
|
"maxVizHeight": 300,
|
|
"minVizHeight": 10,
|
|
"minVizWidth": 0,
|
|
"namePlacement": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showUnfilled": true,
|
|
"sizing": "auto",
|
|
"valueMode": "color"
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "histogram_quantile(0.95, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "p95",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "histogram_quantile(0.5, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"legendFormat": "p50",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(ttft_milliseconds_bucket[$__rate_interval])))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": false,
|
|
"instant": false,
|
|
"legendFormat": "p99",
|
|
"range": true,
|
|
"refId": "C",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Time to First Token (TTFT)",
|
|
"type": "bargauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "ms"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 24
|
|
},
|
|
"id": 12,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "12.0.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))",
|
|
"legendFormat": "p50",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.95, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "p95",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "PBFA97CFB590B2093"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(request_latency_milliseconds_bucket[$__rate_interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "p99",
|
|
"range": true,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "Request latency percentiles",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"preload": false,
|
|
"refresh": "5s",
|
|
"schemaVersion": 41,
|
|
"tags": [],
|
|
"templating": {
|
|
"list": []
|
|
},
|
|
"time": {
|
|
"from": "now-15m",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "Transformers Continuous Batching Metrics",
|
|
"uid": "Lw6CTvVSz",
|
|
"version": 5
|
|
} |