From fce1fcfe717b0e8bee12e8a51944227b57f2f63a Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:11:52 +0200 Subject: [PATCH] Ping team members for new failed tests in daily CI (#34171) * ping * fix * fix * fix * remove runner * update members --------- Co-authored-by: ydshieh --- .../workflows/check_failed_model_tests.yml | 129 ++++++++++++ .github/workflows/self-scheduled.yml | 10 + utils/check_bad_commit.py | 188 ++++++++++++++++++ utils/get_previous_daily_ci.py | 12 ++ utils/notification_service.py | 27 ++- utils/process_bad_commit_report.py | 77 +++++++ 6 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/check_failed_model_tests.yml create mode 100644 utils/check_bad_commit.py create mode 100644 utils/process_bad_commit_report.py diff --git a/.github/workflows/check_failed_model_tests.yml b/.github/workflows/check_failed_model_tests.yml new file mode 100644 index 00000000000..f229765994d --- /dev/null +++ b/.github/workflows/check_failed_model_tests.yml @@ -0,0 +1,129 @@ +name: Process failed tests + +on: + workflow_call: + inputs: + docker: + required: true + type: string + start_sha: + required: true + type: string + + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes + # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. + # This token is created under the bot `hf-transformers-bot`. + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + CUDA_VISIBLE_DEVICES: 0,1 + + +jobs: + run_models_gpu: + name: " " + runs-on: + group: aws-g4dn-2xlarge-cache + container: + image: ${{ inputs.docker }} + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - uses: actions/download-artifact@v4 + with: + name: ci_results_run_models_gpu + path: /transformers/ci_results_run_models_gpu + + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Get target commit + working-directory: /transformers/utils + run: | + echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV + + - name: Checkout to `start_sha` + working-directory: /transformers + run: git fetch && git checkout ${{ inputs.start_sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Check failed tests + working-directory: /transformers + run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json + + - name: Show results + working-directory: /transformers + run: | + ls -l new_model_failures_with_bad_commit.json + cat new_model_failures_with_bad_commit.json + + - name: Checkout back + working-directory: /transformers + run: | + git checkout ${{ inputs.start_sha }} + + - name: Process report + shell: bash + working-directory: /transformers + env: + TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} + run: | + python3 utils/process_bad_commit_report.py + + - name: Process report + shell: bash + working-directory: /transformers + env: + TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} + run: | + { + echo 'REPORT_TEXT<> "$GITHUB_ENV" + + - name: Send processed report + if: ${{ env.REPORT_TEXT != '' }} + uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001 + with: + # Slack channel id, channel name, or user id to post message. + # See also: https://api.slack.com/methods/chat.postMessage#channels + channel-id: '#transformers-ci-feedback-tests' + # For posting a rich message using Block Kit + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ env.REPORT_TEXT }}" + } + } + ] + } + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 1a6f4a48543..353fb59843e 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -562,3 +562,13 @@ jobs: ci_event: ${{ inputs.ci_event }} secrets: inherit + + check_new_model_failures: + if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }} + name: Check new model failures + needs: send_results + uses: ./.github/workflows/check_failed_model_tests.yml + with: + docker: ${{ inputs.docker }} + start_sha: ${{ github.sha }} + secrets: inherit \ No newline at end of file diff --git a/utils/check_bad_commit.py b/utils/check_bad_commit.py new file mode 100644 index 00000000000..091ed5c4a42 --- /dev/null +++ b/utils/check_bad_commit.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import json +import os +import re +import subprocess + +import requests + + +def create_script(target_test): + """Create a python script to be run by `git bisect run` to determine if `target_test` passes or fails. + If a test is not found in a commit, the script with exit code `0` (i.e. `Success`). + + Args: + target_test (`str`): The test to check. + + Returns: + `str`: The script to be run by `git bisect run`. + """ + + script = f""" +import os +import subprocess + +result = subprocess.run( + ["python3", "-m", "pytest", "-v", f"{target_test}"], + capture_output = True, + text=True, +) +print(result.stdout) + +if len(result.stderr) > 0: + if "ERROR: not found: " in result.stderr: + print("test not found in this commit") + exit(0) + else: + print(f"pytest failed to run: {{result.stderr}}") + exit(-1) +elif f"{target_test} FAILED" in result.stdout: + print("test failed") + exit(2) + +exit(0) +""" + + with open("target_script.py", "w") as fp: + fp.write(script.strip()) + + +def find_bad_commit(target_test, start_commit, end_commit): + """Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails. + + Args: + target_test (`str`): The test to check. + start_commit (`str`): The latest commit. + end_commit (`str`): The earliest commit. + + Returns: + `str`: The earliest commit at which `target_test` fails. + """ + + create_script(target_test=target_test) + + bash = f""" +git bisect reset +git bisect start {start_commit} {end_commit} +git bisect run python3 target_script.py +""" + + with open("run_git_bisect.sh", "w") as fp: + fp.write(bash.strip()) + + result = subprocess.run( + ["bash", "run_git_bisect.sh"], + capture_output=True, + text=True, + ) + print(result.stdout) + + if "error: bisect run failed" in result.stderr: + index = result.stderr.find("error: bisect run failed") + bash_error = result.stderr[index:] + + error_msg = f"Error when running git bisect:\nbash error: {bash_error}" + + pattern = "pytest failed to run: .+" + pytest_errors = re.findall(pattern, result.stdout) + if len(pytest_errors) > 0: + pytest_error = pytest_errors[0] + index = pytest_error.find("pytest failed to run: ") + index += len("pytest failed to run: ") + pytest_error = pytest_error[index:] + error_msg += f"pytest error: {pytest_error}" + + raise ValueError(error_msg) + + pattern = r"(.+) is the first bad commit" + commits = re.findall(pattern, result.stdout) + + bad_commit = None + if len(commits) > 0: + bad_commit = commits[0] + + print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}") + print(f"bad_commit: {bad_commit}\n") + + return bad_commit + + +def get_commit_info(commit): + """Get information for a commit via `api.github.com`.""" + pr_number = None + author = None + merged_author = None + + url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}/pulls" + pr_info_for_commit = requests.get(url).json() + + if len(pr_info_for_commit) > 0: + pr_number = pr_info_for_commit[0]["number"] + + url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}" + pr_for_commit = requests.get(url).json() + author = pr_for_commit["user"]["login"] + merged_author = pr_for_commit["merged_by"]["login"] + + if author is None: + url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}" + commit_info = requests.get(url).json() + author = commit_info["author"]["login"] + + return {"commit": commit, "pr_number": pr_number, "author": author, "merged_by": merged_author} + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--start_commit", type=str, required=True, help="The latest commit hash to check.") + parser.add_argument("--end_commit", type=str, required=True, help="The earliest commit hash to check.") + parser.add_argument("--test", type=str, help="The test to check.") + parser.add_argument("--file", type=str, help="The report file.") + parser.add_argument("--output_file", type=str, required=True, help="The path of the output file.") + args = parser.parse_args() + + print(f"start_commit: {args.start_commit}") + print(f"end_commit: {args.end_commit}") + + if len({args.test is None, args.file is None}) != 2: + raise ValueError("Exactly one argument `test` or `file` must be specified.") + + if args.test is not None: + commit = find_bad_commit(target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit) + with open(args.output_file, "w", encoding="UTF-8") as fp: + fp.write(f"{args.test}\n{commit}") + elif os.path.isfile(args.file): + with open(args.file, "r", encoding="UTF-8") as fp: + reports = json.load(fp) + + for model in reports: + # TODO: make this script able to deal with both `single-gpu` and `multi-gpu` via a new argument. + reports[model].pop("multi-gpu", None) + failed_tests = reports[model]["single-gpu"] + + failed_tests_with_bad_commits = [] + for test in failed_tests: + commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit) + info = {"test": test, "commit": commit} + info.update(get_commit_info(commit)) + failed_tests_with_bad_commits.append(info) + reports[model]["single-gpu"] = failed_tests_with_bad_commits + + with open(args.output_file, "w", encoding="UTF-8") as fp: + json.dump(reports, fp, ensure_ascii=False, indent=4) diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py index 975c6f33982..efd7d24a752 100644 --- a/utils/get_previous_daily_ci.py +++ b/utils/get_previous_daily_ci.py @@ -41,6 +41,18 @@ def get_last_daily_ci_runs(token): return workflow_run_id +def get_last_daily_ci_run_commit(token): + """Get the commit sha of the last completed scheduled daily CI workflow run.""" + workflow_runs = get_daily_ci_runs(token) + head_sha = None + for workflow_run in workflow_runs: + if workflow_run["status"] == "completed": + head_sha = workflow_run["head_sha"] + break + + return head_sha + + def get_last_daily_ci_artifacts(artifact_names, output_dir, token): """Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" workflow_run_id = get_last_daily_ci_runs(token) diff --git a/utils/notification_service.py b/utils/notification_service.py index 26eb2973213..629b7933378 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -539,11 +539,36 @@ class Message: ) url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" + # extra processing to save to json format + new_failed_tests = {} + for line in failure_text.split(): + if "https://github.com/huggingface/transformers/actions/runs" in line: + pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>" + items = re.findall(pattern, line) + elif "tests/models/" in line: + model = line.split("/")[2] + new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []} + for url, device in items: + new_failed_tests[model][f"{device}-gpu"].append(line) + file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") + with open(file_path, "w", encoding="UTF-8") as fp: + json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4) + + # upload results to Hub dataset + file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") + _ = api.upload_file( + path_or_fileobj=file_path, + path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json", + repo_id="hf-internal-testing/transformers_daily_ci", + repo_type="dataset", + token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), + ) + block = { "type": "section", "text": { "type": "plain_text", - "text": "bonjour", + "text": " ", }, "accessory": { "type": "button", diff --git a/utils/process_bad_commit_report.py b/utils/process_bad_commit_report.py new file mode 100644 index 00000000000..f61f1b10664 --- /dev/null +++ b/utils/process_bad_commit_report.py @@ -0,0 +1,77 @@ +"""An internal script to process `new_model_failures_with_bad_commit.json` produced by `utils/check_bad_commit.py`. + +This is used by `.github/workflows/check_failed_model_tests.yml` to produce a slack report of the following form + +``` +<{url}|New failed tests> +{ + "GH_ydshieh": { + "vit": 1 + } +} +``` +""" + +import datetime +import json +import os +from collections import Counter +from copy import deepcopy + +from huggingface_hub import HfApi + + +if __name__ == "__main__": + api = HfApi() + + with open("new_model_failures_with_bad_commit.json") as fp: + data = json.load(fp) + + # TODO: extend + team_members = ["ydshieh", "zucchini-nlp", "ArthurZucker", "gante", "LysandreJik", "molbap", "qubvel"] + + # Counting the number of failures grouped by authors + new_data = {} + for model, model_result in data.items(): + for device, failed_tests in model_result.items(): + for failed_test in failed_tests: + author = failed_test["author"] + + if author not in team_members: + author = failed_test["merged_by"] + + if author not in new_data: + new_data[author] = Counter() + new_data[author].update([model]) + for author in new_data: + new_data[author] = dict(new_data[author]) + + # Group by author + new_data_full = {author: deepcopy(data) for author in new_data} + for author, _data in new_data_full.items(): + for model, model_result in _data.items(): + for device, failed_tests in model_result.items(): + failed_tests = [x for x in failed_tests if x["author"] == author or x["merged_by"] == author] + model_result[device] = failed_tests + + # Upload to Hub and get the url + with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp: + json.dump(new_data_full, fp, ensure_ascii=False, indent=4) + commit_info = api.upload_file( + path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json", + path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json", + repo_id="hf-internal-testing/transformers_daily_ci", + repo_type="dataset", + token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), + ) + url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json" + + # Add `GH_` prefix as keyword mention + output = {} + for author, item in new_data.items(): + author = f"GH_{author}" + output[author] = item + + report = f"<{url}|New failed tests>\\n\\n" + report += json.dumps(output, indent=4).replace('"', '\\"').replace("\n", "\\n") + print(report)