Benchmark GitHub Actions workflow (#31163)

* benchmark workflow

* benchmark workflow

* benchmark workflow

* benchmark workflow

* build

* build

* build

* build

* build

* build

* build

* build

* build

* build

* build

* build

* build

* build

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2024-06-05 10:39:00 +02:00 committed by GitHub
parent 63fb253df0
commit 03ea160937
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 65 additions and 2 deletions

42
.github/workflows/benchmark.yml vendored Normal file
View File

@ -0,0 +1,42 @@
name: Self-hosted runner (benchmark)
on:
schedule:
- cron: "17 2 * * *"
workflow_call:
env:
HF_HOME: /mnt/cache
TF_FORCE_GPU_ALLOW_GROWTH: true
jobs:
benchmark:
name: Benchmark
runs-on: [single-gpu, nvidia-gpu, a10, ci]
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Benchmark (daily)
if: github.event_name == 'schedule'
working-directory: /transformers
run: |
python3 -m pip install optimum-benchmark>=0.2.0
HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
- name: Benchmark (merged to main event)
if: github.event_name == 'push' && github.ref_name == 'main'
working-directory: /transformers
run: |
python3 -m pip install optimum-benchmark>=0.2.0
HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun

View File

@ -133,3 +133,8 @@ jobs:
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true waitForSSH: true
benchmark:
name: Benchmark workflow
uses: ./.github/workflows/benchmark.yml
secrets: inherit

View File

@ -32,6 +32,8 @@ from pathlib import Path
from git import Repo from git import Repo
from huggingface_hub import HfApi
from optimum_benchmark import Benchmark from optimum_benchmark import Benchmark
from optimum_benchmark_wrapper import main from optimum_benchmark_wrapper import main
@ -143,7 +145,6 @@ def summarize(run_dir, metrics, expand_metrics=False):
with open(os.path.join(report_dir, "summary.json"), "w") as fp: with open(os.path.join(report_dir, "summary.json"), "w") as fp:
json.dump(summary, fp, indent=4) json.dump(summary, fp, indent=4)
# TODO: upload to Hub
return summaries return summaries
@ -191,7 +192,6 @@ def combine_summaries(summaries):
with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp: with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
json.dump(combined, fp, indent=4) json.dump(combined, fp, indent=4)
# TODO: upload to Hub
print(json.dumps(combined, indent=4)) print(json.dumps(combined, indent=4))
return combined return combined
@ -216,6 +216,11 @@ if __name__ == "__main__":
help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.", help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
) )
parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.") parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.")
parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.")
parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).")
args, optimum_benchmark_args = parser.parse_known_args() args, optimum_benchmark_args = parser.parse_known_args()
repo = Repo(PATH_TO_REPO) repo = Repo(PATH_TO_REPO)
@ -308,3 +313,14 @@ if __name__ == "__main__":
json.dump(run_summaries, fp, indent=4) json.dump(run_summaries, fp, indent=4)
combined_summary = combine_summaries(run_summaries) combined_summary = combine_summaries(run_summaries)
if args.repo_id is not None and args.path_in_repo is not None:
# Upload to Hub
api = HfApi()
api.upload_folder(
folder_path=exp_run_dir,
path_in_repo=args.path_in_repo,
repo_id=args.repo_id,
repo_type="dataset",
token=args.token,
)