feat: add benchmarks_entrypoint.py (#34495)

* feat: add `benchmarks_entrypoint.py`

Adding `benchmarks_entrypoint.py` file, which will be run from the
benchmarks CI.

This python script will list all python files from the `benchmark/`
folder and run the included `run_benchmark` function, allowing people to
add new benchmarks scripts.

* feat: add `MetricsRecorder`

* feat: update dashboard

* fix: add missing arguments to `MetricsRecorder`

* feat: update dash & add datasource + `default.yml`

* fix: move responsibility to create `MetricsRecorder` in bench script

* fix: update incorrect datasource UID

* fix: incorrect variable values

* debug: benchmark entrypoint script

* refactor: update log level

* fix: update broken import

* feat: add debug log in `MetricsRecorder`

* debug: set log level to debug

* fix: set connection `autocommit` to `True`
This commit is contained in:
Luc Georges 2024-12-18 18:59:07 +01:00 committed by GitHub
parent 2c47618c1a
commit 9a94dfe123
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 334 additions and 169 deletions

View File

@ -63,7 +63,7 @@ jobs:
commit_id=$GITHUB_SHA commit_id=$GITHUB_SHA
fi fi
commit_msg=$(git show -s --format=%s | cut -c1-70) commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg" python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
env: env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
# Enable this to see debug logs # Enable this to see debug logs

49
benchmark/README.md Normal file
View File

@ -0,0 +1,49 @@
# Benchmarks
You might want to add new benchmarks.
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
The expected function signature is the following:
```py
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
```
## Writing metrics to the database
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
cf [`llama.py`](./llama.py) to see an example of this in practice.
```py
from benchmarks_entrypoint import MetricsRecorder
import psycopg2
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
# To collect device measurements
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
)
# To collect your model measurements
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
)
```

View File

@ -0,0 +1,144 @@
import argparse
import importlib.util
import logging
import os
from typing import Dict
import psycopg2
import sys
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
register_adapter(dict, Json)
class ImportModuleException(Exception):
pass
class MetricsRecorder:
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
self.conn = connection
self.conn.autocommit = True
self.logger = logger
self.branch = branch
self.commit_id = commit_id
self.commit_msg = commit_msg
def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
"""
Creates a new benchmark, returns the benchmark id
"""
# gpu_name: str, model_id: str
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(self.branch, self.commit_id, self.commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
logger.debug(f"initialised benchmark #{benchmark_id}")
return benchmark_id
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
"""
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
"""
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
self.logger.debug(
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
)
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
measurements,
),
)
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
def close(self):
self.conn.close()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
def parse_arguments():
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
def import_from_path(module_name, file_path):
try:
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
except Exception as e:
raise ImportModuleException(f"failed to load python module: {e}")
if __name__ == "__main__":
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
branch, commit_id, commit_msg = parse_arguments()
for entry in os.scandir(benchmarks_folder_path):
try:
if not entry.name.endswith(".py"):
continue
if entry.path == __file__:
continue
logger.debug(f"loading: {entry.name}")
module = import_from_path(entry.name.split(".")[0], entry.path)
logger.info(f"runnning benchmarks in: {entry.name}")
module.run_benchmark(logger, branch, commit_id, commit_msg)
except ImportModuleException as e:
logger.error(e)
except Exception as e:
logger.error(f"error running benchmarks for {entry.name}: {e}")

10
benchmark/default.yml Normal file
View File

@ -0,0 +1,10 @@
apiVersion: 1
providers:
- name: 'Transformers Benchmarks'
orgId: 1
type: file
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboards

View File

@ -30,7 +30,7 @@
"title": "Go to data", "title": "Go to data",
"tooltip": "Go to data", "tooltip": "Go to data",
"type": "link", "type": "link",
"url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}" "url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
} }
], ],
"liveNow": true, "liveNow": true,
@ -77,7 +77,7 @@
"properties": [ "properties": [
{ {
"id": "custom.width", "id": "custom.width",
"value": 196 "value": 202
} }
] ]
}, },
@ -101,7 +101,7 @@
"properties": [ "properties": [
{ {
"id": "custom.width", "id": "custom.width",
"value": 581 "value": 524
} }
] ]
}, },
@ -113,7 +113,19 @@
"properties": [ "properties": [
{ {
"id": "custom.width", "id": "custom.width",
"value": 379 "value": 353
}
]
},
{
"matcher": {
"id": "byName",
"options": "model_id"
},
"properties": [
{
"id": "custom.width",
"value": 216
} }
] ]
} }
@ -143,12 +155,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"type": "grafana-postgresql-datasource" "default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -306,13 +320,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -431,13 +446,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -565,13 +581,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -686,13 +703,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -807,13 +825,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -928,13 +947,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -1062,13 +1082,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -1183,13 +1204,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -1304,13 +1326,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -1425,13 +1448,14 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
"rawQuery": true, "rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A", "refId": "A",
"sql": { "sql": {
"columns": [ "columns": [
@ -1480,11 +1504,7 @@
"id": 15, "id": 15,
"panels": [ "panels": [
{ {
"datasource": { "datasource": {},
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -1528,8 +1548,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
}, },
{ {
"color": "red", "color": "red",
@ -1563,8 +1582,9 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
@ -1665,11 +1685,7 @@
"type": "timeseries" "type": "timeseries"
}, },
{ {
"datasource": { "datasource": {},
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -1713,8 +1729,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
}, },
{ {
"color": "red", "color": "red",
@ -1748,8 +1763,9 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
@ -1850,11 +1866,7 @@
"type": "timeseries" "type": "timeseries"
}, },
{ {
"datasource": { "datasource": {},
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -1898,8 +1910,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
}, },
{ {
"color": "red", "color": "red",
@ -1933,8 +1944,9 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
@ -2035,11 +2047,7 @@
"type": "timeseries" "type": "timeseries"
}, },
{ {
"datasource": { "datasource": {},
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -2083,8 +2091,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
}, },
{ {
"color": "red", "color": "red",
@ -2118,8 +2125,9 @@
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "bdz2yss7sxo1sc" "uid": "be28nkzirtb0gd"
}, },
"editorMode": "code", "editorMode": "code",
"format": "table", "format": "table",
@ -2224,7 +2232,6 @@
"type": "row" "type": "row"
} }
], ],
"refresh": "",
"schemaVersion": 39, "schemaVersion": 39,
"tags": [], "tags": [],
"templating": { "templating": {
@ -2236,6 +2243,7 @@
"value": "main" "value": "main"
}, },
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd" "uid": "be28nkzirtb0gd"
}, },
@ -2248,7 +2256,7 @@
"name": "branch", "name": "branch",
"options": [], "options": [],
"query": "SELECT DISTINCT branch FROM benchmarks;", "query": "SELECT DISTINCT branch FROM benchmarks;",
"refresh": 2, "refresh": 1,
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,
"sort": 0, "sort": 0,
@ -2261,6 +2269,7 @@
"value": "1729701492845" "value": "1729701492845"
}, },
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd" "uid": "be28nkzirtb0gd"
}, },
@ -2281,10 +2290,11 @@
{ {
"current": { "current": {
"selected": false, "selected": false,
"text": "1730120430069", "text": "1730393397577",
"value": "1730120430069" "value": "1730393397577"
}, },
"datasource": { "datasource": {
"default": true,
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd" "uid": "be28nkzirtb0gd"
}, },
@ -2312,15 +2322,16 @@
"type": "grafana-postgresql-datasource", "type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd" "uid": "be28nkzirtb0gd"
}, },
"definition": "SELECT DISTINCT gpu_name FROM benchmarks;", "definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"description": "",
"hide": 0, "hide": 0,
"includeAll": false, "includeAll": false,
"label": "GPU", "label": "GPU",
"multi": false, "multi": false,
"name": "gpu_name", "name": "gpu_name",
"options": [], "options": [],
"query": "SELECT DISTINCT gpu_name FROM benchmarks;", "query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"refresh": 2, "refresh": 1,
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,
"sort": 0, "sort": 0,
@ -2328,7 +2339,7 @@
}, },
{ {
"current": { "current": {
"selected": false, "selected": true,
"text": "10", "text": "10",
"value": "10" "value": "10"
}, },
@ -2359,6 +2370,6 @@
"timezone": "browser", "timezone": "browser",
"title": "Transformers benchmarks", "title": "Transformers benchmarks",
"uid": "fdz33iyzln9c0a", "uid": "fdz33iyzln9c0a",
"version": 4, "version": 10,
"weekStart": "" "weekStart": ""
} }

View File

@ -0,0 +1,17 @@
apiVersion: 1
datasources:
- name: grafana-postgresql-datasource
uid: be28nkzirtb0gd
type: postgres
url: $GRAFANA_POSTGRES_DATASOURCE_URL
user: $GRAFANA_POSTGRES_DATASOURCE_USER
secureJsonData:
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
jsonData:
database: metrics
maxOpenConns: 100
maxIdleConns: 100
maxIdleConnsAuto: true
connMaxLifetime: 14400
postgresVersion: 1000
timescaledb: false

View File

@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks (
branch VARCHAR(255), branch VARCHAR(255),
commit_id VARCHAR(72), commit_id VARCHAR(72),
commit_message VARCHAR(70), commit_message VARCHAR(70),
gpu_name VARCHAR(255), metadata jsonb,
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC') created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
); );

View File

@ -1,71 +1,25 @@
import argparse from logging import Logger
import json
import logging
import os import os
import sys
from statistics import mean
from threading import Event, Thread from threading import Event, Thread
from time import perf_counter, sleep from time import perf_counter, sleep
from typing import Optional from typing import Optional
from benchmarks_entrypoint import MetricsRecorder
import gpustat import gpustat
import psutil import psutil
import psycopg2 import psycopg2
import torch import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
os.environ["TOKENIZERS_PARALLELISM"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "1"
torch.set_float32_matmul_precision("high") torch.set_float32_matmul_precision("high")
register_adapter(dict, Json)
def parse_arguments(): def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
def collect_metrics(benchmark_id, continue_metric_collection):
p = psutil.Process(os.getpid()) p = psutil.Process(os.getpid())
conn = psycopg2.connect("dbname=metrics")
cur = conn.cursor()
while not continue_metric_collection.is_set(): while not continue_metric_collection.is_set():
with p.oneshot(): with p.oneshot():
cpu_util = p.cpu_percent() cpu_util = p.cpu_percent()
@ -73,47 +27,41 @@ def collect_metrics(benchmark_id, continue_metric_collection):
gpu_stats = gpustat.GPUStatCollection.new_query() gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_util = gpu_stats[0]["utilization.gpu"] gpu_util = gpu_stats[0]["utilization.gpu"]
gpu_mem_megabytes = gpu_stats[0]["memory.used"] gpu_mem_megabytes = gpu_stats[0]["memory.used"]
cur.execute( metrics_recorder.collect_device_measurements(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
) )
sleep(0.01) sleep(0.01)
conn.commit()
conn.close()
def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
continue_metric_collection = Event() continue_metric_collection = Event()
metrics_thread = None metrics_thread = None
model_id = "meta-llama/Llama-2-7b-hf"
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
try: try:
gpu_stats = gpustat.GPUStatCollection.new_query() gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_name = gpu_stats[0]["name"] gpu_name = gpu_stats[0]["name"]
conn = psycopg2.connect("dbname=metrics") benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
cur = conn.cursor() logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
cur.execute( metrics_thread = Thread(
"INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id", target=collect_metrics,
(branch, commit_id, commit_msg, gpu_name), args=[benchmark_id, continue_metric_collection, metrics_recorder],
) )
conn.commit()
benchmark_id = cur.fetchone()[0]
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
metrics_thread.start() metrics_thread.start()
logger.info("started background thread to fetch device metrics") logger.info("started background thread to fetch device metrics")
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
device = "cuda" device = "cuda"
ckpt = "meta-llama/Llama-2-7b-hf"
logger.info("downloading weights") logger.info("downloading weights")
# This is to avoid counting download in model load time measurement # This is to avoid counting download in model load time measurement
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16) model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1) gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
logger.info("loading model") logger.info("loading model")
start = perf_counter() start = perf_counter()
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
ckpt, torch_dtype=torch.float16, generation_config=gen_config model_id, torch_dtype=torch.float16, generation_config=gen_config
).eval() ).eval()
model.to(device) model.to(device)
torch.cuda.synchronize() torch.cuda.synchronize()
@ -121,7 +69,7 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
model_load_time = end - start model_load_time = end - start
logger.info(f"loaded model in: {model_load_time}s") logger.info(f"loaded model in: {model_load_time}s")
tokenizer = AutoTokenizer.from_pretrained(ckpt) tokenizer = AutoTokenizer.from_pretrained(model_id)
prompt = "Why dogs are so cute?" prompt = "Why dogs are so cute?"
inputs = tokenizer(prompt, return_tensors="pt").to(device) inputs = tokenizer(prompt, return_tensors="pt").to(device)
@ -368,41 +316,27 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s") logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
cur.execute( metrics_recorder.collect_model_measurements(
""" benchmark_id,
INSERT INTO model_measurements ( {
benchmark_id, "model_load_time": model_load_time,
measurements "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
) VALUES (%s, %s) "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
""", "first_eager_generate_time_secs": first_eager_generate_time,
( "second_eager_generate_time_secs": second_eager_generate_time,
benchmark_id, "time_to_first_token_secs": time_to_first_token,
{ "time_to_second_token_secs": time_to_second_token,
"model_load_time": model_load_time, "time_to_third_token_secs": time_to_third_token,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, "time_to_next_token_mean_secs": mean_time_to_next_token,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, "first_compile_generate_time_secs": first_compile_generate_time,
"first_eager_generate_time_secs": first_eager_generate_time, "second_compile_generate_time_secs": second_compile_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time, "third_compile_generate_time_secs": third_compile_generate_time,
"time_to_first_token_secs": time_to_first_token, "fourth_compile_generate_time_secs": fourth_compile_generate_time,
"time_to_second_token_secs": time_to_second_token, },
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
),
) )
conn.commit()
conn.close()
except Exception as e: except Exception as e:
logger.error(f"Caught exception: {e}") logger.error(f"Caught exception: {e}")
continue_metric_collection.set() continue_metric_collection.set()
if metrics_thread is not None: if metrics_thread is not None:
metrics_thread.join() metrics_thread.join()
metrics_recorder.close()
if __name__ == "__main__":
branch, commit_id, commit_msg = parse_arguments()
run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)