feat: add repository field to benchmarks table (#38582)

* feat: add `repository` field to benchmarks table

* fix: remove unwanted `,`
This commit is contained in:
Luc Georges 2025-06-04 15:40:52 +02:00 committed by GitHub
parent 1285aec4cc
commit ae3733f06e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 26 additions and 12 deletions

View File

@ -64,7 +64,7 @@ jobs:
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark/benchmarks_entrypoint.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
python3 benchmark/benchmarks_entrypoint.py "huggingface/transformers" "$BRANCH_NAME" "$commit_id" "$commit_msg"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
# Enable this to see debug logs

View File

@ -2,11 +2,11 @@ import argparse
import importlib.util
import logging
import os
from typing import Dict
import sys
from typing import Dict, Tuple
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
from psycopg2.extras import Json
register_adapter(dict, Json)
@ -17,10 +17,13 @@ class ImportModuleException(Exception):
class MetricsRecorder:
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
def __init__(
self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str
):
self.conn = connection
self.conn.autocommit = True
self.logger = logger
self.repository = repository
self.branch = branch
self.commit_id = commit_id
self.commit_msg = commit_msg
@ -32,8 +35,8 @@ class MetricsRecorder:
# gpu_name: str, model_id: str
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(self.branch, self.commit_id, self.commit_msg, metadata),
"INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
(self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
logger.debug(f"initialised benchmark #{benchmark_id}")
@ -82,12 +85,18 @@ handler.setFormatter(formatter)
logger.addHandler(handler)
def parse_arguments():
def parse_arguments() -> Tuple[str, str, str, str]:
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"repository",
type=str,
help="The repository name on which the benchmarking is performed.",
)
parser.add_argument(
"branch",
type=str,
@ -108,7 +117,7 @@ def parse_arguments():
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
return args.repository, args.branch, args.commit_id, args.commit_msg
def import_from_path(module_name, file_path):
@ -125,7 +134,7 @@ def import_from_path(module_name, file_path):
if __name__ == "__main__":
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
branch, commit_id, commit_msg = parse_arguments()
repository, branch, commit_id, commit_msg = parse_arguments()
for entry in os.scandir(benchmarks_folder_path):
try:
@ -136,7 +145,7 @@ if __name__ == "__main__":
logger.debug(f"loading: {entry.name}")
module = import_from_path(entry.name.split(".")[0], entry.path)
logger.info(f"running benchmarks in: {entry.name}")
module.run_benchmark(logger, branch, commit_id, commit_msg)
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
except ImportModuleException as e:
logger.error(e)
except Exception as e:

View File

@ -1,5 +1,6 @@
CREATE TABLE IF NOT EXISTS benchmarks (
benchmark_id SERIAL PRIMARY KEY,
repository VARCHAR(255),
branch VARCHAR(255),
commit_id VARCHAR(72),
commit_message VARCHAR(70),

View File

@ -33,11 +33,15 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
sleep(0.01)
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
def run_benchmark(
logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100
):
continue_metric_collection = Event()
metrics_thread = None
model_id = "meta-llama/Llama-2-7b-hf"
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
metrics_recorder = MetricsRecorder(
psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg
)
try:
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_name = gpu_stats[0]["name"]