Ping team members for new failed tests in daily CI (#34171)

* ping * fix * fix * fix * remove runner * update members --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-31 02:02:21 +06:00 · 2024-10-17 16:11:52 +02:00 · 2024-10-17 16:11:52 +02:00 · fce1fcfe71
commit fce1fcfe71
parent aa3e35ac67
6 changed files with 442 additions and 1 deletions
--- a/.github/workflows/check_failed_model_tests.yml
+++ b/.github/workflows/check_failed_model_tests.yml
@ -0,0 +1,129 @@
+name: Process failed tests
+
+on:
+  workflow_call:
+    inputs:
+      docker:
+        required: true
+        type: string
+      start_sha:
+        required: true
+        type: string
+
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+  # This token is created under the bot `hf-transformers-bot`.
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  CUDA_VISIBLE_DEVICES: 0,1
+
+
+jobs:
+  run_models_gpu:
+    name: " "
+    runs-on:
+      group: aws-g4dn-2xlarge-cache
+    container:
+      image: ${{ inputs.docker }}
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: ci_results_run_models_gpu
+          path: /transformers/ci_results_run_models_gpu
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Get target commit
+        working-directory: /transformers/utils
+        run: |
+          echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
+
+      - name: Checkout to `start_sha`
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.start_sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Check failed tests
+        working-directory: /transformers
+        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
+
+      - name: Show results
+        working-directory: /transformers
+        run: |
+          ls -l new_model_failures_with_bad_commit.json
+          cat new_model_failures_with_bad_commit.json
+
+      - name: Checkout back
+        working-directory: /transformers
+        run: |
+          git checkout ${{ inputs.start_sha }}
+
+      - name: Process report
+        shell: bash
+        working-directory: /transformers
+        env:
+          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
+        run: |
+          python3 utils/process_bad_commit_report.py
+
+      - name: Process report
+        shell: bash
+        working-directory: /transformers
+        env:
+          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
+        run: |
+          {
+            echo 'REPORT_TEXT<<EOF'
+            python3 utils/process_bad_commit_report.py
+            echo EOF
+          } >> "$GITHUB_ENV"
+
+      - name: Send processed report
+        if: ${{ env.REPORT_TEXT != '' }}
+        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
+        with:
+          # Slack channel id, channel name, or user id to post message.
+          # See also: https://api.slack.com/methods/chat.postMessage#channels
+          channel-id: '#transformers-ci-feedback-tests'
+          # For posting a rich message using Block Kit
+          payload: |
+            {
+              "blocks": [
+                {
+                  "type": "section",
+                  "text": {
+                    "type": "mrkdwn",
+                    "text": "${{ env.REPORT_TEXT }}"
+                  }
+                }
+              ]
+            }
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -562,3 +562,13 @@ jobs:
      ci_event: ${{ inputs.ci_event }}

    secrets: inherit
+
+  check_new_model_failures:
+    if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
+    name: Check new model failures
+    needs: send_results
+    uses: ./.github/workflows/check_failed_model_tests.yml
+    with:
+      docker: ${{ inputs.docker }}
+      start_sha: ${{ github.sha }}
+    secrets: inherit
--- a/utils/check_bad_commit.py
+++ b/utils/check_bad_commit.py
@ -0,0 +1,188 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import json
+import os
+import re
+import subprocess
+
+import requests
+
+
+def create_script(target_test):
+    """Create a python script to be run by `git bisect run` to determine if `target_test` passes or fails.
+    If a test is not found in a commit, the script with exit code `0` (i.e. `Success`).
+
+    Args:
+        target_test (`str`): The test to check.
+
+    Returns:
+        `str`: The script to be run by `git bisect run`.
+    """
+
+    script = f"""
+import os
+import subprocess
+
+result = subprocess.run(
+    ["python3", "-m", "pytest", "-v", f"{target_test}"],
+    capture_output = True,
+    text=True,
+)
+print(result.stdout)
+
+if len(result.stderr) > 0:
+    if "ERROR: not found: " in result.stderr:
+        print("test not found in this commit")
+        exit(0)
+    else:
+        print(f"pytest failed to run: {{result.stderr}}")
+        exit(-1)
+elif f"{target_test} FAILED" in result.stdout:
+    print("test failed")
+    exit(2)
+
+exit(0)
+"""
+
+    with open("target_script.py", "w") as fp:
+        fp.write(script.strip())
+
+
+def find_bad_commit(target_test, start_commit, end_commit):
+    """Find (backward) the earliest commit between `start_commit` and `end_commit` at which `target_test` fails.
+
+    Args:
+        target_test (`str`): The test to check.
+        start_commit (`str`): The latest commit.
+        end_commit (`str`): The earliest commit.
+
+    Returns:
+        `str`: The earliest commit at which `target_test` fails.
+    """
+
+    create_script(target_test=target_test)
+
+    bash = f"""
+git bisect reset
+git bisect start {start_commit} {end_commit}
+git bisect run python3 target_script.py
+"""
+
+    with open("run_git_bisect.sh", "w") as fp:
+        fp.write(bash.strip())
+
+    result = subprocess.run(
+        ["bash", "run_git_bisect.sh"],
+        capture_output=True,
+        text=True,
+    )
+    print(result.stdout)
+
+    if "error: bisect run failed" in result.stderr:
+        index = result.stderr.find("error: bisect run failed")
+        bash_error = result.stderr[index:]
+
+        error_msg = f"Error when running git bisect:\nbash error: {bash_error}"
+
+        pattern = "pytest failed to run: .+"
+        pytest_errors = re.findall(pattern, result.stdout)
+        if len(pytest_errors) > 0:
+            pytest_error = pytest_errors[0]
+            index = pytest_error.find("pytest failed to run: ")
+            index += len("pytest failed to run: ")
+            pytest_error = pytest_error[index:]
+            error_msg += f"pytest error: {pytest_error}"
+
+        raise ValueError(error_msg)
+
+    pattern = r"(.+) is the first bad commit"
+    commits = re.findall(pattern, result.stdout)
+
+    bad_commit = None
+    if len(commits) > 0:
+        bad_commit = commits[0]
+
+    print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}")
+    print(f"bad_commit: {bad_commit}\n")
+
+    return bad_commit
+
+
+def get_commit_info(commit):
+    """Get information for a commit via `api.github.com`."""
+    pr_number = None
+    author = None
+    merged_author = None
+
+    url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}/pulls"
+    pr_info_for_commit = requests.get(url).json()
+
+    if len(pr_info_for_commit) > 0:
+        pr_number = pr_info_for_commit[0]["number"]
+
+        url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}"
+        pr_for_commit = requests.get(url).json()
+        author = pr_for_commit["user"]["login"]
+        merged_author = pr_for_commit["merged_by"]["login"]
+
+    if author is None:
+        url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}"
+        commit_info = requests.get(url).json()
+        author = commit_info["author"]["login"]
+
+    return {"commit": commit, "pr_number": pr_number, "author": author, "merged_by": merged_author}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--start_commit", type=str, required=True, help="The latest commit hash to check.")
+    parser.add_argument("--end_commit", type=str, required=True, help="The earliest commit hash to check.")
+    parser.add_argument("--test", type=str, help="The test to check.")
+    parser.add_argument("--file", type=str, help="The report file.")
+    parser.add_argument("--output_file", type=str, required=True, help="The path of the output file.")
+    args = parser.parse_args()
+
+    print(f"start_commit: {args.start_commit}")
+    print(f"end_commit: {args.end_commit}")
+
+    if len({args.test is None, args.file is None}) != 2:
+        raise ValueError("Exactly one argument `test` or `file` must be specified.")
+
+    if args.test is not None:
+        commit = find_bad_commit(target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit)
+        with open(args.output_file, "w", encoding="UTF-8") as fp:
+            fp.write(f"{args.test}\n{commit}")
+    elif os.path.isfile(args.file):
+        with open(args.file, "r", encoding="UTF-8") as fp:
+            reports = json.load(fp)
+
+        for model in reports:
+            # TODO: make this script able to deal with both `single-gpu` and `multi-gpu` via a new argument.
+            reports[model].pop("multi-gpu", None)
+            failed_tests = reports[model]["single-gpu"]
+
+            failed_tests_with_bad_commits = []
+            for test in failed_tests:
+                commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit)
+                info = {"test": test, "commit": commit}
+                info.update(get_commit_info(commit))
+                failed_tests_with_bad_commits.append(info)
+            reports[model]["single-gpu"] = failed_tests_with_bad_commits
+
+        with open(args.output_file, "w", encoding="UTF-8") as fp:
+            json.dump(reports, fp, ensure_ascii=False, indent=4)
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@ -41,6 +41,18 @@ def get_last_daily_ci_runs(token):
    return workflow_run_id


+def get_last_daily_ci_run_commit(token):
+    """Get the commit sha of the last completed scheduled daily CI workflow run."""
+    workflow_runs = get_daily_ci_runs(token)
+    head_sha = None
+    for workflow_run in workflow_runs:
+        if workflow_run["status"] == "completed":
+            head_sha = workflow_run["head_sha"]
+            break
+
+    return head_sha
+
+
 def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
    """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
    workflow_run_id = get_last_daily_ci_runs(token)
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@ -539,11 +539,36 @@ class Message:
            )
            url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"

+            # extra processing to save to json format
+            new_failed_tests = {}
+            for line in failure_text.split():
+                if "https://github.com/huggingface/transformers/actions/runs" in line:
+                    pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
+                    items = re.findall(pattern, line)
+                elif "tests/models/" in line:
+                    model = line.split("/")[2]
+                    new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
+                    for url, device in items:
+                        new_failed_tests[model][f"{device}-gpu"].append(line)
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
+            with open(file_path, "w", encoding="UTF-8") as fp:
+                json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
+
+            # upload results to Hub dataset
+            file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
+            _ = api.upload_file(
+                path_or_fileobj=file_path,
+                path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
+                repo_id="hf-internal-testing/transformers_daily_ci",
+                repo_type="dataset",
+                token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
+            )
+
            block = {
                "type": "section",
                "text": {
                    "type": "plain_text",
-                    "text": "bonjour",
+                    "text": " ",
                },
                "accessory": {
                    "type": "button",
--- a/utils/process_bad_commit_report.py
+++ b/utils/process_bad_commit_report.py
@ -0,0 +1,77 @@
+"""An internal script to process `new_model_failures_with_bad_commit.json` produced by `utils/check_bad_commit.py`.
+
+This is used by `.github/workflows/check_failed_model_tests.yml` to produce a slack report of the following form
+
+```
+<{url}|New failed tests>
+{
+   "GH_ydshieh": {
+       "vit": 1
+   }
+}
+```
+"""
+
+import datetime
+import json
+import os
+from collections import Counter
+from copy import deepcopy
+
+from huggingface_hub import HfApi
+
+
+if __name__ == "__main__":
+    api = HfApi()
+
+    with open("new_model_failures_with_bad_commit.json") as fp:
+        data = json.load(fp)
+
+    # TODO: extend
+    team_members = ["ydshieh", "zucchini-nlp", "ArthurZucker", "gante", "LysandreJik", "molbap", "qubvel"]
+
+    # Counting the number of failures grouped by authors
+    new_data = {}
+    for model, model_result in data.items():
+        for device, failed_tests in model_result.items():
+            for failed_test in failed_tests:
+                author = failed_test["author"]
+
+                if author not in team_members:
+                    author = failed_test["merged_by"]
+
+                if author not in new_data:
+                    new_data[author] = Counter()
+                new_data[author].update([model])
+    for author in new_data:
+        new_data[author] = dict(new_data[author])
+
+    # Group by author
+    new_data_full = {author: deepcopy(data) for author in new_data}
+    for author, _data in new_data_full.items():
+        for model, model_result in _data.items():
+            for device, failed_tests in model_result.items():
+                failed_tests = [x for x in failed_tests if x["author"] == author or x["merged_by"] == author]
+                model_result[device] = failed_tests
+
+    # Upload to Hub and get the url
+    with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
+        json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
+    commit_info = api.upload_file(
+        path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
+        path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
+        repo_id="hf-internal-testing/transformers_daily_ci",
+        repo_type="dataset",
+        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
+    )
+    url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
+
+    # Add `GH_` prefix as keyword mention
+    output = {}
+    for author, item in new_data.items():
+        author = f"GH_{author}"
+        output[author] = item
+
+    report = f"<{url}|New failed tests>\\n\\n"
+    report += json.dumps(output, indent=4).replace('"', '\\"').replace("\n", "\\n")
+    print(report)