From ae3733f06e2e4de45584d143ab37dcf38ef15e1c Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 4 Jun 2025 15:40:52 +0200 Subject: [PATCH] feat: add `repository` field to benchmarks table (#38582) * feat: add `repository` field to benchmarks table * fix: remove unwanted `,` --- .github/workflows/benchmark.yml | 2 +- benchmark/benchmarks_entrypoint.py | 27 ++++++++++++++++++--------- benchmark/init_db.sql | 1 + benchmark/llama.py | 8 ++++++-- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6b5555097c0..42fa1b43877 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -64,7 +64,7 @@ jobs: commit_id=$GITHUB_SHA fi commit_msg=$(git show -s --format=%s | cut -c1-70) - python3 benchmark/benchmarks_entrypoint.py "$BRANCH_NAME" "$commit_id" "$commit_msg" + python3 benchmark/benchmarks_entrypoint.py "huggingface/transformers" "$BRANCH_NAME" "$commit_id" "$commit_msg" env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} # Enable this to see debug logs diff --git a/benchmark/benchmarks_entrypoint.py b/benchmark/benchmarks_entrypoint.py index 6c036fdd693..71b83254b40 100644 --- a/benchmark/benchmarks_entrypoint.py +++ b/benchmark/benchmarks_entrypoint.py @@ -2,11 +2,11 @@ import argparse import importlib.util import logging import os -from typing import Dict import sys +from typing import Dict, Tuple -from psycopg2.extras import Json from psycopg2.extensions import register_adapter +from psycopg2.extras import Json register_adapter(dict, Json) @@ -17,10 +17,13 @@ class ImportModuleException(Exception): class MetricsRecorder: - def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str): + def __init__( + self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str + ): self.conn = connection self.conn.autocommit = True self.logger = logger + self.repository = repository self.branch = branch self.commit_id = commit_id self.commit_msg = commit_msg @@ -32,8 +35,8 @@ class MetricsRecorder: # gpu_name: str, model_id: str with self.conn.cursor() as cur: cur.execute( - "INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id", - (self.branch, self.commit_id, self.commit_msg, metadata), + "INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id", + (self.repository, self.branch, self.commit_id, self.commit_msg, metadata), ) benchmark_id = cur.fetchone()[0] logger.debug(f"initialised benchmark #{benchmark_id}") @@ -82,12 +85,18 @@ handler.setFormatter(formatter) logger.addHandler(handler) -def parse_arguments(): +def parse_arguments() -> Tuple[str, str, str, str]: """ Parse command line arguments for the benchmarking CLI. """ parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.") + parser.add_argument( + "repository", + type=str, + help="The repository name on which the benchmarking is performed.", + ) + parser.add_argument( "branch", type=str, @@ -108,7 +117,7 @@ def parse_arguments(): args = parser.parse_args() - return args.branch, args.commit_id, args.commit_msg + return args.repository, args.branch, args.commit_id, args.commit_msg def import_from_path(module_name, file_path): @@ -125,7 +134,7 @@ def import_from_path(module_name, file_path): if __name__ == "__main__": benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__)) - branch, commit_id, commit_msg = parse_arguments() + repository, branch, commit_id, commit_msg = parse_arguments() for entry in os.scandir(benchmarks_folder_path): try: @@ -136,7 +145,7 @@ if __name__ == "__main__": logger.debug(f"loading: {entry.name}") module = import_from_path(entry.name.split(".")[0], entry.path) logger.info(f"running benchmarks in: {entry.name}") - module.run_benchmark(logger, branch, commit_id, commit_msg) + module.run_benchmark(logger, repository, branch, commit_id, commit_msg) except ImportModuleException as e: logger.error(e) except Exception as e: diff --git a/benchmark/init_db.sql b/benchmark/init_db.sql index a7864c4af18..9a575177d72 100644 --- a/benchmark/init_db.sql +++ b/benchmark/init_db.sql @@ -1,5 +1,6 @@ CREATE TABLE IF NOT EXISTS benchmarks ( benchmark_id SERIAL PRIMARY KEY, + repository VARCHAR(255), branch VARCHAR(255), commit_id VARCHAR(72), commit_message VARCHAR(70), diff --git a/benchmark/llama.py b/benchmark/llama.py index 6a477de7fbf..bc60454e0af 100644 --- a/benchmark/llama.py +++ b/benchmark/llama.py @@ -33,11 +33,15 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder): sleep(0.01) -def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): +def run_benchmark( + logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100 +): continue_metric_collection = Event() metrics_thread = None model_id = "meta-llama/Llama-2-7b-hf" - metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg) + metrics_recorder = MetricsRecorder( + psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg + ) try: gpu_stats = gpustat.GPUStatCollection.new_query() gpu_name = gpu_stats[0]["name"]