mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00
feat: add benchmarks_entrypoint.py
(#34495)
* feat: add `benchmarks_entrypoint.py` Adding `benchmarks_entrypoint.py` file, which will be run from the benchmarks CI. This python script will list all python files from the `benchmark/` folder and run the included `run_benchmark` function, allowing people to add new benchmarks scripts. * feat: add `MetricsRecorder` * feat: update dashboard * fix: add missing arguments to `MetricsRecorder` * feat: update dash & add datasource + `default.yml` * fix: move responsibility to create `MetricsRecorder` in bench script * fix: update incorrect datasource UID * fix: incorrect variable values * debug: benchmark entrypoint script * refactor: update log level * fix: update broken import * feat: add debug log in `MetricsRecorder` * debug: set log level to debug * fix: set connection `autocommit` to `True`
This commit is contained in:
parent
2c47618c1a
commit
9a94dfe123
2
.github/workflows/benchmark.yml
vendored
2
.github/workflows/benchmark.yml
vendored
@ -63,7 +63,7 @@ jobs:
|
||||
commit_id=$GITHUB_SHA
|
||||
fi
|
||||
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
||||
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
||||
python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
# Enable this to see debug logs
|
||||
|
49
benchmark/README.md
Normal file
49
benchmark/README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# Benchmarks
|
||||
|
||||
You might want to add new benchmarks.
|
||||
|
||||
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
|
||||
|
||||
The expected function signature is the following:
|
||||
|
||||
```py
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
```
|
||||
|
||||
## Writing metrics to the database
|
||||
|
||||
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||
|
||||
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
||||
|
||||
```py
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
import psycopg2
|
||||
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||
# To collect device measurements
|
||||
metrics_recorder.collect_device_measurements(
|
||||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||
)
|
||||
# To collect your model measurements
|
||||
metrics_recorder.collect_model_measurements(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
)
|
||||
```
|
144
benchmark/benchmarks_entrypoint.py
Normal file
144
benchmark/benchmarks_entrypoint.py
Normal file
@ -0,0 +1,144 @@
|
||||
import argparse
|
||||
import importlib.util
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict
|
||||
import psycopg2
|
||||
import sys
|
||||
|
||||
from psycopg2.extras import Json
|
||||
from psycopg2.extensions import register_adapter
|
||||
|
||||
|
||||
register_adapter(dict, Json)
|
||||
|
||||
|
||||
class ImportModuleException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MetricsRecorder:
|
||||
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
|
||||
self.conn = connection
|
||||
self.conn.autocommit = True
|
||||
self.logger = logger
|
||||
self.branch = branch
|
||||
self.commit_id = commit_id
|
||||
self.commit_msg = commit_msg
|
||||
|
||||
def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
|
||||
"""
|
||||
Creates a new benchmark, returns the benchmark id
|
||||
"""
|
||||
# gpu_name: str, model_id: str
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
)
|
||||
benchmark_id = cur.fetchone()[0]
|
||||
logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||
return benchmark_id
|
||||
|
||||
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
|
||||
"""
|
||||
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||
)
|
||||
self.logger.debug(
|
||||
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||
)
|
||||
|
||||
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO model_measurements (
|
||||
benchmark_id,
|
||||
measurements
|
||||
) VALUES (%s, %s)
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
measurements,
|
||||
),
|
||||
)
|
||||
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""
|
||||
Parse command line arguments for the benchmarking CLI.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
||||
|
||||
parser.add_argument(
|
||||
"branch",
|
||||
type=str,
|
||||
help="The branch name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_id",
|
||||
type=str,
|
||||
help="The commit hash on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_msg",
|
||||
type=str,
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args.branch, args.commit_id, args.commit_msg
|
||||
|
||||
|
||||
def import_from_path(module_name, file_path):
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
except Exception as e:
|
||||
raise ImportModuleException(f"failed to load python module: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
branch, commit_id, commit_msg = parse_arguments()
|
||||
|
||||
for entry in os.scandir(benchmarks_folder_path):
|
||||
try:
|
||||
if not entry.name.endswith(".py"):
|
||||
continue
|
||||
if entry.path == __file__:
|
||||
continue
|
||||
logger.debug(f"loading: {entry.name}")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
logger.info(f"runnning benchmarks in: {entry.name}")
|
||||
module.run_benchmark(logger, branch, commit_id, commit_msg)
|
||||
except ImportModuleException as e:
|
||||
logger.error(e)
|
||||
except Exception as e:
|
||||
logger.error(f"error running benchmarks for {entry.name}: {e}")
|
10
benchmark/default.yml
Normal file
10
benchmark/default.yml
Normal file
@ -0,0 +1,10 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Transformers Benchmarks'
|
||||
orgId: 1
|
||||
type: file
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/dashboards
|
@ -30,7 +30,7 @@
|
||||
"title": "Go to data",
|
||||
"tooltip": "Go to data",
|
||||
"type": "link",
|
||||
"url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
|
||||
"url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
|
||||
}
|
||||
],
|
||||
"liveNow": true,
|
||||
@ -77,7 +77,7 @@
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 196
|
||||
"value": 202
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -101,7 +101,7 @@
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 581
|
||||
"value": 524
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -113,7 +113,19 @@
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 379
|
||||
"value": 353
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "model_id"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 216
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -143,12 +155,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "grafana-postgresql-datasource"
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -306,13 +320,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -431,13 +446,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -565,13 +581,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -686,13 +703,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -807,13 +825,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -928,13 +947,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -1062,13 +1082,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -1183,13 +1204,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -1304,13 +1326,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -1425,13 +1448,14 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
@ -1480,11 +1504,7 @@
|
||||
"id": 15,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"datasource": {},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@ -1528,8 +1548,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -1563,8 +1582,9 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
@ -1665,11 +1685,7 @@
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"datasource": {},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@ -1713,8 +1729,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -1748,8 +1763,9 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
@ -1850,11 +1866,7 @@
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"datasource": {},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@ -1898,8 +1910,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -1933,8 +1944,9 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
@ -2035,11 +2047,7 @@
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"datasource": {},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@ -2083,8 +2091,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -2118,8 +2125,9 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "bdz2yss7sxo1sc"
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
@ -2224,7 +2232,6 @@
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
@ -2236,6 +2243,7 @@
|
||||
"value": "main"
|
||||
},
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
@ -2248,7 +2256,7 @@
|
||||
"name": "branch",
|
||||
"options": [],
|
||||
"query": "SELECT DISTINCT branch FROM benchmarks;",
|
||||
"refresh": 2,
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
@ -2261,6 +2269,7 @@
|
||||
"value": "1729701492845"
|
||||
},
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
@ -2281,10 +2290,11 @@
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "1730120430069",
|
||||
"value": "1730120430069"
|
||||
"text": "1730393397577",
|
||||
"value": "1730393397577"
|
||||
},
|
||||
"datasource": {
|
||||
"default": true,
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
@ -2312,15 +2322,16 @@
|
||||
"type": "grafana-postgresql-datasource",
|
||||
"uid": "be28nkzirtb0gd"
|
||||
},
|
||||
"definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
|
||||
"definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
|
||||
"description": "",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "GPU",
|
||||
"multi": false,
|
||||
"name": "gpu_name",
|
||||
"options": [],
|
||||
"query": "SELECT DISTINCT gpu_name FROM benchmarks;",
|
||||
"refresh": 2,
|
||||
"query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
@ -2328,7 +2339,7 @@
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
@ -2359,6 +2370,6 @@
|
||||
"timezone": "browser",
|
||||
"title": "Transformers benchmarks",
|
||||
"uid": "fdz33iyzln9c0a",
|
||||
"version": 4,
|
||||
"version": 10,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
17
benchmark/grafana_datasource.yaml
Normal file
17
benchmark/grafana_datasource.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: grafana-postgresql-datasource
|
||||
uid: be28nkzirtb0gd
|
||||
type: postgres
|
||||
url: $GRAFANA_POSTGRES_DATASOURCE_URL
|
||||
user: $GRAFANA_POSTGRES_DATASOURCE_USER
|
||||
secureJsonData:
|
||||
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
|
||||
jsonData:
|
||||
database: metrics
|
||||
maxOpenConns: 100
|
||||
maxIdleConns: 100
|
||||
maxIdleConnsAuto: true
|
||||
connMaxLifetime: 14400
|
||||
postgresVersion: 1000
|
||||
timescaledb: false
|
@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks (
|
||||
branch VARCHAR(255),
|
||||
commit_id VARCHAR(72),
|
||||
commit_message VARCHAR(70),
|
||||
gpu_name VARCHAR(255),
|
||||
metadata jsonb,
|
||||
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
|
||||
);
|
||||
|
||||
|
@ -1,71 +1,25 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from logging import Logger
|
||||
import os
|
||||
import sys
|
||||
from statistics import mean
|
||||
from threading import Event, Thread
|
||||
from time import perf_counter, sleep
|
||||
from typing import Optional
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
import gpustat
|
||||
import psutil
|
||||
import psycopg2
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
||||
from psycopg2.extras import Json
|
||||
from psycopg2.extensions import register_adapter
|
||||
|
||||
|
||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||
torch.set_float32_matmul_precision("high")
|
||||
register_adapter(dict, Json)
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""
|
||||
Parse command line arguments for the benchmarking CLI.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
||||
|
||||
parser.add_argument(
|
||||
"branch",
|
||||
type=str,
|
||||
help="The branch name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_id",
|
||||
type=str,
|
||||
help="The commit hash on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_msg",
|
||||
type=str,
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args.branch, args.commit_id, args.commit_msg
|
||||
|
||||
|
||||
def collect_metrics(benchmark_id, continue_metric_collection):
|
||||
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||
p = psutil.Process(os.getpid())
|
||||
conn = psycopg2.connect("dbname=metrics")
|
||||
cur = conn.cursor()
|
||||
while not continue_metric_collection.is_set():
|
||||
with p.oneshot():
|
||||
cpu_util = p.cpu_percent()
|
||||
@ -73,47 +27,41 @@ def collect_metrics(benchmark_id, continue_metric_collection):
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_util = gpu_stats[0]["utilization.gpu"]
|
||||
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
|
||||
cur.execute(
|
||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||
metrics_recorder.collect_device_measurements(
|
||||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||
)
|
||||
sleep(0.01)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
continue_metric_collection = Event()
|
||||
metrics_thread = None
|
||||
model_id = "meta-llama/Llama-2-7b-hf"
|
||||
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||
try:
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_name = gpu_stats[0]["name"]
|
||||
conn = psycopg2.connect("dbname=metrics")
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(branch, commit_id, commit_msg, gpu_name),
|
||||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
|
||||
metrics_thread = Thread(
|
||||
target=collect_metrics,
|
||||
args=[benchmark_id, continue_metric_collection, metrics_recorder],
|
||||
)
|
||||
conn.commit()
|
||||
benchmark_id = cur.fetchone()[0]
|
||||
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
|
||||
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
|
||||
metrics_thread.start()
|
||||
logger.info("started background thread to fetch device metrics")
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
||||
|
||||
device = "cuda"
|
||||
ckpt = "meta-llama/Llama-2-7b-hf"
|
||||
|
||||
logger.info("downloading weights")
|
||||
# This is to avoid counting download in model load time measurement
|
||||
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
||||
logger.info("loading model")
|
||||
start = perf_counter()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
ckpt, torch_dtype=torch.float16, generation_config=gen_config
|
||||
model_id, torch_dtype=torch.float16, generation_config=gen_config
|
||||
).eval()
|
||||
model.to(device)
|
||||
torch.cuda.synchronize()
|
||||
@ -121,7 +69,7 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
|
||||
model_load_time = end - start
|
||||
logger.info(f"loaded model in: {model_load_time}s")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(ckpt)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
prompt = "Why dogs are so cute?"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
||||
@ -368,41 +316,27 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
|
||||
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO model_measurements (
|
||||
benchmark_id,
|
||||
measurements
|
||||
) VALUES (%s, %s)
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
),
|
||||
metrics_recorder.collect_model_measurements(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Caught exception: {e}")
|
||||
continue_metric_collection.set()
|
||||
if metrics_thread is not None:
|
||||
metrics_thread.join()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
branch, commit_id, commit_msg = parse_arguments()
|
||||
run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)
|
||||
metrics_recorder.close()
|
||||
|
Loading…
Reference in New Issue
Block a user