mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
feat: add benchmarks_entrypoint.py
(#34495)
* feat: add `benchmarks_entrypoint.py` Adding `benchmarks_entrypoint.py` file, which will be run from the benchmarks CI. This python script will list all python files from the `benchmark/` folder and run the included `run_benchmark` function, allowing people to add new benchmarks scripts. * feat: add `MetricsRecorder` * feat: update dashboard * fix: add missing arguments to `MetricsRecorder` * feat: update dash & add datasource + `default.yml` * fix: move responsibility to create `MetricsRecorder` in bench script * fix: update incorrect datasource UID * fix: incorrect variable values * debug: benchmark entrypoint script * refactor: update log level * fix: update broken import * feat: add debug log in `MetricsRecorder` * debug: set log level to debug * fix: set connection `autocommit` to `True`
This commit is contained in:
parent
2c47618c1a
commit
9a94dfe123
2
.github/workflows/benchmark.yml
vendored
2
.github/workflows/benchmark.yml
vendored
@ -63,7 +63,7 @@ jobs:
|
|||||||
commit_id=$GITHUB_SHA
|
commit_id=$GITHUB_SHA
|
||||||
fi
|
fi
|
||||||
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
||||||
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||||
# Enable this to see debug logs
|
# Enable this to see debug logs
|
||||||
|
49
benchmark/README.md
Normal file
49
benchmark/README.md
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Benchmarks
|
||||||
|
|
||||||
|
You might want to add new benchmarks.
|
||||||
|
|
||||||
|
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
|
||||||
|
|
||||||
|
The expected function signature is the following:
|
||||||
|
|
||||||
|
```py
|
||||||
|
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||||
|
```
|
||||||
|
|
||||||
|
## Writing metrics to the database
|
||||||
|
|
||||||
|
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||||
|
|
||||||
|
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
||||||
|
|
||||||
|
```py
|
||||||
|
from benchmarks_entrypoint import MetricsRecorder
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||||
|
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||||
|
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||||
|
# To collect device measurements
|
||||||
|
metrics_recorder.collect_device_measurements(
|
||||||
|
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||||
|
)
|
||||||
|
# To collect your model measurements
|
||||||
|
metrics_recorder.collect_model_measurements(
|
||||||
|
benchmark_id,
|
||||||
|
{
|
||||||
|
"model_load_time": model_load_time,
|
||||||
|
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||||
|
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||||
|
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||||
|
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||||
|
"time_to_first_token_secs": time_to_first_token,
|
||||||
|
"time_to_second_token_secs": time_to_second_token,
|
||||||
|
"time_to_third_token_secs": time_to_third_token,
|
||||||
|
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||||
|
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||||
|
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||||
|
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||||
|
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
144
benchmark/benchmarks_entrypoint.py
Normal file
144
benchmark/benchmarks_entrypoint.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
import argparse
|
||||||
|
import importlib.util
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Dict
|
||||||
|
import psycopg2
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from psycopg2.extras import Json
|
||||||
|
from psycopg2.extensions import register_adapter
|
||||||
|
|
||||||
|
|
||||||
|
register_adapter(dict, Json)
|
||||||
|
|
||||||
|
|
||||||
|
class ImportModuleException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MetricsRecorder:
|
||||||
|
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
|
||||||
|
self.conn = connection
|
||||||
|
self.conn.autocommit = True
|
||||||
|
self.logger = logger
|
||||||
|
self.branch = branch
|
||||||
|
self.commit_id = commit_id
|
||||||
|
self.commit_msg = commit_msg
|
||||||
|
|
||||||
|
def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
|
||||||
|
"""
|
||||||
|
Creates a new benchmark, returns the benchmark id
|
||||||
|
"""
|
||||||
|
# gpu_name: str, model_id: str
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
|
||||||
|
(self.branch, self.commit_id, self.commit_msg, metadata),
|
||||||
|
)
|
||||||
|
benchmark_id = cur.fetchone()[0]
|
||||||
|
logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||||
|
return benchmark_id
|
||||||
|
|
||||||
|
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
|
||||||
|
"""
|
||||||
|
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
|
||||||
|
"""
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||||
|
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||||
|
)
|
||||||
|
self.logger.debug(
|
||||||
|
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO model_measurements (
|
||||||
|
benchmark_id,
|
||||||
|
measurements
|
||||||
|
) VALUES (%s, %s)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
benchmark_id,
|
||||||
|
measurements,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
handler = logging.StreamHandler(sys.stdout)
|
||||||
|
handler.setLevel(logging.INFO)
|
||||||
|
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
"""
|
||||||
|
Parse command line arguments for the benchmarking CLI.
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"branch",
|
||||||
|
type=str,
|
||||||
|
help="The branch name on which the benchmarking is performed.",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"commit_id",
|
||||||
|
type=str,
|
||||||
|
help="The commit hash on which the benchmarking is performed.",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"commit_msg",
|
||||||
|
type=str,
|
||||||
|
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args.branch, args.commit_id, args.commit_msg
|
||||||
|
|
||||||
|
|
||||||
|
def import_from_path(module_name, file_path):
|
||||||
|
try:
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[module_name] = module
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
except Exception as e:
|
||||||
|
raise ImportModuleException(f"failed to load python module: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
branch, commit_id, commit_msg = parse_arguments()
|
||||||
|
|
||||||
|
for entry in os.scandir(benchmarks_folder_path):
|
||||||
|
try:
|
||||||
|
if not entry.name.endswith(".py"):
|
||||||
|
continue
|
||||||
|
if entry.path == __file__:
|
||||||
|
continue
|
||||||
|
logger.debug(f"loading: {entry.name}")
|
||||||
|
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||||
|
logger.info(f"runnning benchmarks in: {entry.name}")
|
||||||
|
module.run_benchmark(logger, branch, commit_id, commit_msg)
|
||||||
|
except ImportModuleException as e:
|
||||||
|
logger.error(e)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"error running benchmarks for {entry.name}: {e}")
|
10
benchmark/default.yml
Normal file
10
benchmark/default.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Transformers Benchmarks'
|
||||||
|
orgId: 1
|
||||||
|
type: file
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/dashboards
|
@ -30,7 +30,7 @@
|
|||||||
"title": "Go to data",
|
"title": "Go to data",
|
||||||
"tooltip": "Go to data",
|
"tooltip": "Go to data",
|
||||||
"type": "link",
|
"type": "link",
|
||||||
"url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
|
"url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"liveNow": true,
|
"liveNow": true,
|
||||||
@ -77,7 +77,7 @@
|
|||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "custom.width",
|
"id": "custom.width",
|
||||||
"value": 196
|
"value": 202
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -101,7 +101,7 @@
|
|||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "custom.width",
|
"id": "custom.width",
|
||||||
"value": 581
|
"value": 524
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -113,7 +113,19 @@
|
|||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "custom.width",
|
"id": "custom.width",
|
||||||
"value": 379
|
"value": 353
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": {
|
||||||
|
"id": "byName",
|
||||||
|
"options": "model_id"
|
||||||
|
},
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"id": "custom.width",
|
||||||
|
"value": 216
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -143,12 +155,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "grafana-postgresql-datasource"
|
"default": true,
|
||||||
|
"type": "grafana-postgresql-datasource",
|
||||||
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -306,13 +320,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -431,13 +446,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -565,13 +581,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -686,13 +703,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -807,13 +825,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -928,13 +947,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -1062,13 +1082,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -1183,13 +1204,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -1304,13 +1326,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -1425,13 +1448,14 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"rawQuery": true,
|
"rawQuery": true,
|
||||||
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"sql": {
|
"sql": {
|
||||||
"columns": [
|
"columns": [
|
||||||
@ -1480,11 +1504,7 @@
|
|||||||
"id": 15,
|
"id": 15,
|
||||||
"panels": [
|
"panels": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {},
|
||||||
"default": true,
|
|
||||||
"type": "grafana-postgresql-datasource",
|
|
||||||
"uid": "be28nkzirtb0gd"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"color": {
|
"color": {
|
||||||
@ -1528,8 +1548,7 @@
|
|||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{
|
{
|
||||||
"color": "green",
|
"color": "green"
|
||||||
"value": null
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"color": "red",
|
"color": "red",
|
||||||
@ -1563,8 +1582,9 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
@ -1665,11 +1685,7 @@
|
|||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {},
|
||||||
"default": true,
|
|
||||||
"type": "grafana-postgresql-datasource",
|
|
||||||
"uid": "be28nkzirtb0gd"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"color": {
|
"color": {
|
||||||
@ -1713,8 +1729,7 @@
|
|||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{
|
{
|
||||||
"color": "green",
|
"color": "green"
|
||||||
"value": null
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"color": "red",
|
"color": "red",
|
||||||
@ -1748,8 +1763,9 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
@ -1850,11 +1866,7 @@
|
|||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {},
|
||||||
"default": true,
|
|
||||||
"type": "grafana-postgresql-datasource",
|
|
||||||
"uid": "be28nkzirtb0gd"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"color": {
|
"color": {
|
||||||
@ -1898,8 +1910,7 @@
|
|||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{
|
{
|
||||||
"color": "green",
|
"color": "green"
|
||||||
"value": null
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"color": "red",
|
"color": "red",
|
||||||
@ -1933,8 +1944,9 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
@ -2035,11 +2047,7 @@
|
|||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {},
|
||||||
"default": true,
|
|
||||||
"type": "grafana-postgresql-datasource",
|
|
||||||
"uid": "be28nkzirtb0gd"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"color": {
|
"color": {
|
||||||
@ -2083,8 +2091,7 @@
|
|||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{
|
{
|
||||||
"color": "green",
|
"color": "green"
|
||||||
"value": null
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"color": "red",
|
"color": "red",
|
||||||
@ -2118,8 +2125,9 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "bdz2yss7sxo1sc"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
@ -2224,7 +2232,6 @@
|
|||||||
"type": "row"
|
"type": "row"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"refresh": "",
|
|
||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"tags": [],
|
"tags": [],
|
||||||
"templating": {
|
"templating": {
|
||||||
@ -2236,6 +2243,7 @@
|
|||||||
"value": "main"
|
"value": "main"
|
||||||
},
|
},
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "be28nkzirtb0gd"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
@ -2248,7 +2256,7 @@
|
|||||||
"name": "branch",
|
"name": "branch",
|
||||||
"options": [],
|
"options": [],
|
||||||
"query": "SELECT DISTINCT branch FROM benchmarks;",
|
"query": "SELECT DISTINCT branch FROM benchmarks;",
|
||||||
"refresh": 2,
|
"refresh": 1,
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"skipUrlSync": false,
|
"skipUrlSync": false,
|
||||||
"sort": 0,
|
"sort": 0,
|
||||||
@ -2261,6 +2269,7 @@
|
|||||||
"value": "1729701492845"
|
"value": "1729701492845"
|
||||||
},
|
},
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "be28nkzirtb0gd"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
@ -2281,10 +2290,11 @@
|
|||||||
{
|
{
|
||||||
"current": {
|
"current": {
|
||||||
"selected": false,
|
"selected": false,
|
||||||
"text": "1730120430069",
|
"text": "1730393397577",
|
||||||
"value": "1730120430069"
|
"value": "1730393397577"
|
||||||
},
|
},
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"default": true,
|
||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "be28nkzirtb0gd"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
@ -2312,15 +2322,16 @@
|
|||||||
"type": "grafana-postgresql-datasource",
|
"type": "grafana-postgresql-datasource",
|
||||||
"uid": "be28nkzirtb0gd"
|
"uid": "be28nkzirtb0gd"
|
||||||
},
|
},
|
||||||
"definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
|
"definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
|
||||||
|
"description": "",
|
||||||
"hide": 0,
|
"hide": 0,
|
||||||
"includeAll": false,
|
"includeAll": false,
|
||||||
"label": "GPU",
|
"label": "GPU",
|
||||||
"multi": false,
|
"multi": false,
|
||||||
"name": "gpu_name",
|
"name": "gpu_name",
|
||||||
"options": [],
|
"options": [],
|
||||||
"query": "SELECT DISTINCT gpu_name FROM benchmarks;",
|
"query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
|
||||||
"refresh": 2,
|
"refresh": 1,
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"skipUrlSync": false,
|
"skipUrlSync": false,
|
||||||
"sort": 0,
|
"sort": 0,
|
||||||
@ -2328,7 +2339,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"current": {
|
"current": {
|
||||||
"selected": false,
|
"selected": true,
|
||||||
"text": "10",
|
"text": "10",
|
||||||
"value": "10"
|
"value": "10"
|
||||||
},
|
},
|
||||||
@ -2359,6 +2370,6 @@
|
|||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Transformers benchmarks",
|
"title": "Transformers benchmarks",
|
||||||
"uid": "fdz33iyzln9c0a",
|
"uid": "fdz33iyzln9c0a",
|
||||||
"version": 4,
|
"version": 10,
|
||||||
"weekStart": ""
|
"weekStart": ""
|
||||||
}
|
}
|
||||||
|
17
benchmark/grafana_datasource.yaml
Normal file
17
benchmark/grafana_datasource.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
datasources:
|
||||||
|
- name: grafana-postgresql-datasource
|
||||||
|
uid: be28nkzirtb0gd
|
||||||
|
type: postgres
|
||||||
|
url: $GRAFANA_POSTGRES_DATASOURCE_URL
|
||||||
|
user: $GRAFANA_POSTGRES_DATASOURCE_USER
|
||||||
|
secureJsonData:
|
||||||
|
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
|
||||||
|
jsonData:
|
||||||
|
database: metrics
|
||||||
|
maxOpenConns: 100
|
||||||
|
maxIdleConns: 100
|
||||||
|
maxIdleConnsAuto: true
|
||||||
|
connMaxLifetime: 14400
|
||||||
|
postgresVersion: 1000
|
||||||
|
timescaledb: false
|
@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks (
|
|||||||
branch VARCHAR(255),
|
branch VARCHAR(255),
|
||||||
commit_id VARCHAR(72),
|
commit_id VARCHAR(72),
|
||||||
commit_message VARCHAR(70),
|
commit_message VARCHAR(70),
|
||||||
gpu_name VARCHAR(255),
|
metadata jsonb,
|
||||||
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
|
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1,71 +1,25 @@
|
|||||||
import argparse
|
from logging import Logger
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from statistics import mean
|
|
||||||
from threading import Event, Thread
|
from threading import Event, Thread
|
||||||
from time import perf_counter, sleep
|
from time import perf_counter, sleep
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from benchmarks_entrypoint import MetricsRecorder
|
||||||
import gpustat
|
import gpustat
|
||||||
import psutil
|
import psutil
|
||||||
import psycopg2
|
import psycopg2
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
||||||
from psycopg2.extras import Json
|
|
||||||
from psycopg2.extensions import register_adapter
|
|
||||||
|
|
||||||
|
|
||||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
handler = logging.StreamHandler(sys.stdout)
|
|
||||||
handler.setLevel(logging.INFO)
|
|
||||||
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
|
|
||||||
handler.setFormatter(formatter)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
|
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||||
torch.set_float32_matmul_precision("high")
|
torch.set_float32_matmul_precision("high")
|
||||||
register_adapter(dict, Json)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||||
"""
|
|
||||||
Parse command line arguments for the benchmarking CLI.
|
|
||||||
"""
|
|
||||||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"branch",
|
|
||||||
type=str,
|
|
||||||
help="The branch name on which the benchmarking is performed.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"commit_id",
|
|
||||||
type=str,
|
|
||||||
help="The commit hash on which the benchmarking is performed.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"commit_msg",
|
|
||||||
type=str,
|
|
||||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
return args.branch, args.commit_id, args.commit_msg
|
|
||||||
|
|
||||||
|
|
||||||
def collect_metrics(benchmark_id, continue_metric_collection):
|
|
||||||
p = psutil.Process(os.getpid())
|
p = psutil.Process(os.getpid())
|
||||||
conn = psycopg2.connect("dbname=metrics")
|
|
||||||
cur = conn.cursor()
|
|
||||||
while not continue_metric_collection.is_set():
|
while not continue_metric_collection.is_set():
|
||||||
with p.oneshot():
|
with p.oneshot():
|
||||||
cpu_util = p.cpu_percent()
|
cpu_util = p.cpu_percent()
|
||||||
@ -73,47 +27,41 @@ def collect_metrics(benchmark_id, continue_metric_collection):
|
|||||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||||
gpu_util = gpu_stats[0]["utilization.gpu"]
|
gpu_util = gpu_stats[0]["utilization.gpu"]
|
||||||
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
|
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
|
||||||
cur.execute(
|
metrics_recorder.collect_device_measurements(
|
||||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
|
||||||
)
|
)
|
||||||
sleep(0.01)
|
sleep(0.01)
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||||
continue_metric_collection = Event()
|
continue_metric_collection = Event()
|
||||||
metrics_thread = None
|
metrics_thread = None
|
||||||
|
model_id = "meta-llama/Llama-2-7b-hf"
|
||||||
|
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||||
try:
|
try:
|
||||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||||
gpu_name = gpu_stats[0]["name"]
|
gpu_name = gpu_stats[0]["name"]
|
||||||
conn = psycopg2.connect("dbname=metrics")
|
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||||
cur = conn.cursor()
|
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
|
||||||
cur.execute(
|
metrics_thread = Thread(
|
||||||
"INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
|
target=collect_metrics,
|
||||||
(branch, commit_id, commit_msg, gpu_name),
|
args=[benchmark_id, continue_metric_collection, metrics_recorder],
|
||||||
)
|
)
|
||||||
conn.commit()
|
|
||||||
benchmark_id = cur.fetchone()[0]
|
|
||||||
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
|
|
||||||
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
|
|
||||||
metrics_thread.start()
|
metrics_thread.start()
|
||||||
logger.info("started background thread to fetch device metrics")
|
logger.info("started background thread to fetch device metrics")
|
||||||
|
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
||||||
|
|
||||||
device = "cuda"
|
device = "cuda"
|
||||||
ckpt = "meta-llama/Llama-2-7b-hf"
|
|
||||||
|
|
||||||
logger.info("downloading weights")
|
logger.info("downloading weights")
|
||||||
# This is to avoid counting download in model load time measurement
|
# This is to avoid counting download in model load time measurement
|
||||||
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
|
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||||
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
||||||
logger.info("loading model")
|
logger.info("loading model")
|
||||||
start = perf_counter()
|
start = perf_counter()
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
ckpt, torch_dtype=torch.float16, generation_config=gen_config
|
model_id, torch_dtype=torch.float16, generation_config=gen_config
|
||||||
).eval()
|
).eval()
|
||||||
model.to(device)
|
model.to(device)
|
||||||
torch.cuda.synchronize()
|
torch.cuda.synchronize()
|
||||||
@ -121,7 +69,7 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
|
|||||||
model_load_time = end - start
|
model_load_time = end - start
|
||||||
logger.info(f"loaded model in: {model_load_time}s")
|
logger.info(f"loaded model in: {model_load_time}s")
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(ckpt)
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
|
|
||||||
prompt = "Why dogs are so cute?"
|
prompt = "Why dogs are so cute?"
|
||||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
||||||
@ -368,41 +316,27 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
|
|||||||
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
|
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
|
||||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||||
|
|
||||||
cur.execute(
|
metrics_recorder.collect_model_measurements(
|
||||||
"""
|
benchmark_id,
|
||||||
INSERT INTO model_measurements (
|
{
|
||||||
benchmark_id,
|
"model_load_time": model_load_time,
|
||||||
measurements
|
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||||
) VALUES (%s, %s)
|
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||||
""",
|
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||||
(
|
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||||
benchmark_id,
|
"time_to_first_token_secs": time_to_first_token,
|
||||||
{
|
"time_to_second_token_secs": time_to_second_token,
|
||||||
"model_load_time": model_load_time,
|
"time_to_third_token_secs": time_to_third_token,
|
||||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||||
"time_to_first_token_secs": time_to_first_token,
|
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||||
"time_to_second_token_secs": time_to_second_token,
|
},
|
||||||
"time_to_third_token_secs": time_to_third_token,
|
|
||||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
|
||||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
|
||||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
|
||||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
|
||||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Caught exception: {e}")
|
logger.error(f"Caught exception: {e}")
|
||||||
continue_metric_collection.set()
|
continue_metric_collection.set()
|
||||||
if metrics_thread is not None:
|
if metrics_thread is not None:
|
||||||
metrics_thread.join()
|
metrics_thread.join()
|
||||||
|
metrics_recorder.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
branch, commit_id, commit_msg = parse_arguments()
|
|
||||||
run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user