CI reporting improvements (#38230)

update

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2025-05-20 19:34:58 +02:00 committed by GitHub
parent cb513e35f9
commit feec294dea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 375 additions and 155 deletions

View File

@ -39,55 +39,100 @@ jobs:
name: ci_results_run_models_gpu name: ci_results_run_models_gpu
path: /transformers/ci_results_run_models_gpu path: /transformers/ci_results_run_models_gpu
- name: Check file
working-directory: /transformers
run: |
if [ -f ci_results_run_models_gpu/new_model_failures.json ]; then
echo "`ci_results_run_models_gpu/new_model_failures.json` exists, continue ..."
echo "process=true" >> $GITHUB_ENV
else
echo "`ci_results_run_models_gpu/new_model_failures.json` doesn't exist, abort."
echo "process=false" >> $GITHUB_ENV
fi
- uses: actions/download-artifact@v4
if: ${{ env.process == 'true' }}
with:
pattern: setup_values*
path: setup_values
merge-multiple: true
- name: Prepare some setup values
if: ${{ env.process == 'true' }}
run: |
if [ -f setup_values/prev_workflow_run_id.txt ]; then
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
if [ -f setup_values/other_workflow_run_id.txt ]; then
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
- name: Update clone - name: Update clone
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: Get target commit - name: Get target commit
working-directory: /transformers/utils working-directory: /transformers/utils
if: ${{ env.process == 'true' }}
run: | run: |
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
- name: Checkout to `start_sha` - name: Checkout to `start_sha`
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: git fetch && git checkout ${{ inputs.start_sha }} run: git fetch && git checkout ${{ inputs.start_sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI - name: NVIDIA-SMI
if: ${{ env.process == 'true' }}
run: | run: |
nvidia-smi nvidia-smi
- name: Environment - name: Environment
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: | run: |
python3 utils/print_env.py python3 utils/print_env.py
- name: Show installed libraries and their versions - name: Show installed libraries and their versions
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: pip freeze run: pip freeze
- name: Check failed tests - name: Check failed tests
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
- name: Show results - name: Show results
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: | run: |
ls -l new_model_failures_with_bad_commit.json ls -l new_model_failures_with_bad_commit.json
cat new_model_failures_with_bad_commit.json cat new_model_failures_with_bad_commit.json
- name: Checkout back - name: Checkout back
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
run: | run: |
git checkout ${{ inputs.start_sha }} git checkout ${{ inputs.start_sha }}
- name: Process report - name: Process report
shell: bash shell: bash
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
env: env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: | run: |
python3 utils/process_bad_commit_report.py python3 utils/process_bad_commit_report.py
@ -95,7 +140,9 @@ jobs:
- name: Process report - name: Process report
shell: bash shell: bash
working-directory: /transformers working-directory: /transformers
if: ${{ env.process == 'true' }}
env: env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: | run: |
{ {
@ -105,7 +152,7 @@ jobs:
} >> "$GITHUB_ENV" } >> "$GITHUB_ENV"
- name: Send processed report - name: Send processed report
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }} if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001 uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with: with:
# Slack channel id, channel name, or user id to post message. # Slack channel id, channel name, or user id to post message.

View File

@ -8,8 +8,43 @@ on:
push: push:
branches: branches:
- run_scheduled_ci* - run_scheduled_ci*
workflow_dispatch:
inputs:
prev_workflow_run_id:
description: 'previous workflow run id to compare'
type: string
required: false
default: ""
other_workflow_run_id:
description: 'other workflow run id to compare'
type: string
required: false
default: ""
# Used for `push` to easily modiffy the target workflow runs to compare against
env:
prev_workflow_run_id: ""
other_workflow_run_id: ""
jobs: jobs:
setup:
name: Setup
runs-on: ubuntu-22.04
steps:
- name: Setup
run: |
mkdir "setup_values"
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: setup_values
path: setup_values
model-ci: model-ci:
name: Model CI name: Model CI
uses: ./.github/workflows/self-scheduled.yml uses: ./.github/workflows/self-scheduled.yml

View File

@ -39,6 +39,21 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
- name: Prepare some setup values
run: |
if [ -f setup_values/prev_workflow_run_id.txt ]; then
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
if [ -f setup_values/other_workflow_run_id.txt ]; then
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
- name: Send message to Slack - name: Send message to Slack
if: ${{ inputs.job != 'run_quantization_torch_gpu' }} if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
env: env:
@ -50,7 +65,6 @@ jobs:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: ${{ inputs.ci_event }} CI_EVENT: ${{ inputs.ci_event }}
CI_SHA: ${{ github.sha }} CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
CI_TEST_JOB: ${{ inputs.job }} CI_TEST_JOB: ${{ inputs.job }}
SETUP_STATUS: ${{ inputs.setup_status }} SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
@ -58,7 +72,6 @@ jobs:
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an # For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
# empty string, and the called script still get one argument (which is the emtpy string). # empty string, and the called script still get one argument (which is the emtpy string).
run: | run: |
sudo apt-get install -y curl
pip install huggingface_hub pip install huggingface_hub
pip install slack_sdk pip install slack_sdk
pip show slack_sdk pip show slack_sdk
@ -86,7 +99,6 @@ jobs:
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`. # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
run: | run: |
sudo apt-get install -y curl
pip install huggingface_hub pip install huggingface_hub
pip install slack_sdk pip install slack_sdk
pip show slack_sdk pip show slack_sdk

View File

@ -144,6 +144,7 @@ def get_commit_info(commit):
url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}" url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}"
pr_for_commit = requests.get(url).json() pr_for_commit = requests.get(url).json()
author = pr_for_commit["user"]["login"] author = pr_for_commit["user"]["login"]
if pr_for_commit["merged_by"] is not None:
merged_author = pr_for_commit["merged_by"]["login"] merged_author = pr_for_commit["merged_by"]["login"]
if author is None: if author is None:

View File

@ -5,7 +5,7 @@ import requests
from get_ci_error_statistics import download_artifact, get_artifacts_links from get_ci_error_statistics import download_artifact, get_artifacts_links
def get_daily_ci_runs(token, num_runs=7): def get_daily_ci_runs(token, num_runs=7, workflow_id=None):
"""Get the workflow runs of the scheduled (daily) CI. """Get the workflow runs of the scheduled (daily) CI.
This only selects the runs triggered by the `schedule` event on the `main` branch. This only selects the runs triggered by the `schedule` event on the `main` branch.
@ -18,7 +18,13 @@ def get_daily_ci_runs(token, num_runs=7):
# From a given workflow run (where we have workflow run id), we can get the workflow id by going to # From a given workflow run (where we have workflow run id), we can get the workflow id by going to
# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
# and check the `workflow_id` key. # and check the `workflow_id` key.
workflow_id = "90575235"
if not workflow_id:
workflow_run_id = os.environ["GITHUB_RUN_ID"]
workflow_run = requests.get(
f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}", headers=headers
).json()
workflow_id = workflow_run["workflow_id"]
url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs" url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
# On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
@ -29,33 +35,64 @@ def get_daily_ci_runs(token, num_runs=7):
return result["workflow_runs"] return result["workflow_runs"]
def get_last_daily_ci_runs(token): def get_last_daily_ci_run(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
"""Get the last completed workflow run id of the scheduled (daily) CI.""" """Get the last completed workflow run id of the scheduled (daily) CI."""
workflow_runs = get_daily_ci_runs(token) headers = None
workflow_run_id = None if token is not None:
for workflow_run in workflow_runs: headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
if workflow_run["status"] == "completed":
workflow_run_id = workflow_run["id"] workflow_run = None
if workflow_run_id is not None and workflow_run_id != "":
workflow_run = requests.get(
f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}", headers=headers
).json()
return workflow_run
workflow_runs = get_daily_ci_runs(token, workflow_id=workflow_id)
for run in workflow_runs:
if commit_sha in [None, ""] and run["status"] == "completed":
workflow_run = run
break break
# if `commit_sha` is specified, and `workflow_run["head_sha"]` matches it, return it.
elif commit_sha not in [None, ""] and run["head_sha"] == commit_sha:
workflow_run = run
break
return workflow_run
def get_last_daily_ci_workflow_run_id(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
"""Get the last completed workflow run id of the scheduled (daily) CI."""
if workflow_run_id is not None and workflow_run_id != "":
return workflow_run_id
workflow_run = get_last_daily_ci_run(token, workflow_id=workflow_id, commit_sha=commit_sha)
workflow_run_id = None
if workflow_run is not None:
workflow_run_id = workflow_run["id"]
return workflow_run_id return workflow_run_id
def get_last_daily_ci_run_commit(token): def get_last_daily_ci_run_commit(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
"""Get the commit sha of the last completed scheduled daily CI workflow run.""" """Get the commit sha of the last completed scheduled daily CI workflow run."""
workflow_runs = get_daily_ci_runs(token) workflow_run = get_last_daily_ci_run(
head_sha = None token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha
for workflow_run in workflow_runs: )
if workflow_run["status"] == "completed": workflow_run_head_sha = None
head_sha = workflow_run["head_sha"] if workflow_run is not None:
break workflow_run_head_sha = workflow_run["head_sha"]
return head_sha return workflow_run_head_sha
def get_last_daily_ci_artifacts(artifact_names, output_dir, token): def get_last_daily_ci_artifacts(
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None
):
"""Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
workflow_run_id = get_last_daily_ci_runs(token) workflow_run_id = get_last_daily_ci_workflow_run_id(
token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha
)
if workflow_run_id is not None: if workflow_run_id is not None:
artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token) artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
for artifact_name in artifact_names: for artifact_name in artifact_names:
@ -66,9 +103,18 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
) )
def get_last_daily_ci_reports(artifact_names, output_dir, token): def get_last_daily_ci_reports(
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None
):
"""Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
get_last_daily_ci_artifacts(artifact_names, output_dir, token) get_last_daily_ci_artifacts(
artifact_names,
output_dir,
token,
workflow_run_id=workflow_run_id,
workflow_id=workflow_id,
commit_sha=commit_sha,
)
results = {} results = {}
for artifact_name in artifact_names: for artifact_name in artifact_names:

View File

@ -14,7 +14,6 @@
import ast import ast
import collections import collections
import datetime
import functools import functools
import json import json
import operator import operator
@ -26,7 +25,7 @@ from typing import Any, Dict, List, Optional, Union
import requests import requests
from get_ci_error_statistics import get_jobs from get_ci_error_statistics import get_jobs
from get_previous_daily_ci import get_last_daily_ci_reports from get_previous_daily_ci import get_last_daily_ci_reports, get_last_daily_ci_run, get_last_daily_ci_workflow_run_id
from huggingface_hub import HfApi from huggingface_hub import HfApi
from slack_sdk import WebClient from slack_sdk import WebClient
@ -109,6 +108,7 @@ class Message:
additional_results: Dict, additional_results: Dict,
selected_warnings: Optional[List] = None, selected_warnings: Optional[List] = None,
prev_ci_artifacts=None, prev_ci_artifacts=None,
other_ci_artifacts=None,
): ):
self.title = title self.title = title
self.ci_title = ci_title self.ci_title = ci_title
@ -159,6 +159,7 @@ class Message:
self.selected_warnings = selected_warnings self.selected_warnings = selected_warnings
self.prev_ci_artifacts = prev_ci_artifacts self.prev_ci_artifacts = prev_ci_artifacts
self.other_ci_artifacts = other_ci_artifacts
@property @property
def time(self) -> str: def time(self) -> str:
@ -515,28 +516,39 @@ class Message:
if len(self.selected_warnings) > 0: if len(self.selected_warnings) > 0:
blocks.append(self.warnings) blocks.append(self.warnings)
new_failure_blocks = self.get_new_model_failure_blocks(with_header=False) for idx, (prev_workflow_run_id, prev_ci_artifacts) in enumerate(
[self.prev_ci_artifacts] + self.other_ci_artifacts
):
if idx == 0:
# This is the truncated version to show on slack. For now.
new_failure_blocks = self.get_new_model_failure_blocks(
prev_ci_artifacts=prev_ci_artifacts, with_header=False
)
if len(new_failure_blocks) > 0: if len(new_failure_blocks) > 0:
blocks.extend(new_failure_blocks) blocks.extend(new_failure_blocks)
# To save the list of new model failures # To save the list of new model failures and uploaed to hub repositories
extra_blocks = self.get_new_model_failure_blocks(to_truncate=False) extra_blocks = self.get_new_model_failure_blocks(prev_ci_artifacts=prev_ci_artifacts, to_truncate=False)
if extra_blocks: if extra_blocks:
filename = "new_model_failures"
if idx > 0:
filename = f"{filename}_against_{prev_workflow_run_id}"
failure_text = extra_blocks[-1]["text"]["text"] failure_text = extra_blocks[-1]["text"]["text"]
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.txt")
with open(file_path, "w", encoding="UTF-8") as fp: with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(failure_text) fp.write(failure_text)
# upload results to Hub dataset # upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.txt")
commit_info = api.upload_file( commit_info = api.upload_file(
path_or_fileobj=file_path, path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.txt",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/{filename}.txt"
# extra processing to save to json format # extra processing to save to json format
new_failed_tests = {} new_failed_tests = {}
@ -553,20 +565,21 @@ class Message:
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []} new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
for url, device in items: for url, device in items:
new_failed_tests[model][f"{device}-gpu"].append(line) new_failed_tests[model][f"{device}-gpu"].append(line)
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
with open(file_path, "w", encoding="UTF-8") as fp: with open(file_path, "w", encoding="UTF-8") as fp:
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4) json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
# upload results to Hub dataset # upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json") file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
_ = api.upload_file( _ = api.upload_file(
path_or_fileobj=file_path, path_or_fileobj=file_path,
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
if idx == 0:
block = { block = {
"type": "section", "type": "section",
"text": { "text": {
@ -700,18 +713,18 @@ class Message:
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}}, {"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
] ]
def get_new_model_failure_blocks(self, with_header=True, to_truncate=True): def get_new_model_failure_blocks(self, prev_ci_artifacts, with_header=True, to_truncate=True):
if self.prev_ci_artifacts is None: if prev_ci_artifacts is None:
return [] return []
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0]) sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
prev_model_results = {} prev_model_results = {}
if ( if (
f"ci_results_{job_name}" in self.prev_ci_artifacts f"ci_results_{job_name}" in prev_ci_artifacts
and "model_results.json" in self.prev_ci_artifacts[f"ci_results_{job_name}"] and "model_results.json" in prev_ci_artifacts[f"ci_results_{job_name}"]
): ):
prev_model_results = json.loads(self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"]) prev_model_results = json.loads(prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
all_failure_lines = {} all_failure_lines = {}
for job, job_result in sorted_dict: for job, job_result in sorted_dict:
@ -812,20 +825,6 @@ class Message:
time.sleep(1) time.sleep(1)
blocks = self.get_new_model_failure_blocks()
if blocks:
print("Sending the following reply")
print(json.dumps({"blocks": blocks}))
client.chat_postMessage(
channel=SLACK_REPORT_CHANNEL_ID,
text="Results for new failures",
blocks=blocks,
thread_ts=self.thread_ts["ts"],
)
time.sleep(1)
def retrieve_artifact(artifact_path: str, gpu: Optional[str]): def retrieve_artifact(artifact_path: str, gpu: Optional[str]):
if gpu not in [None, "single", "multi"]: if gpu not in [None, "single", "multi"]:
@ -1168,6 +1167,23 @@ if __name__ == "__main__":
"run_torch_cuda_extensions_gpu": "DeepSpeed", "run_torch_cuda_extensions_gpu": "DeepSpeed",
} }
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"
workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
workflow_id = workflow_run["workflow_id"]
report_repo_folder = workflow_run_created_time.split("T")[0]
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
# Remove some entries in `additional_files` if they are not concerned. # Remove some entries in `additional_files` if they are not concerned.
test_name = None test_name = None
if job_name in job_to_test_map: if job_name in job_to_test_map:
@ -1241,8 +1257,9 @@ if __name__ == "__main__":
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")): if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main" nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
# results. # results.
@ -1250,11 +1267,9 @@ if __name__ == "__main__":
with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp: with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp:
json.dump(model_results, fp, indent=4, ensure_ascii=False) json.dump(model_results, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file( api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_results.json", path_or_fileobj=f"ci_results_{job_name}/model_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_results.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
@ -1272,11 +1287,9 @@ if __name__ == "__main__":
with open(f"ci_results_{job_name}/model_job_links.json", "w", encoding="UTF-8") as fp: with open(f"ci_results_{job_name}/model_job_links.json", "w", encoding="UTF-8") as fp:
json.dump(model_job_links, fp, indent=4, ensure_ascii=False) json.dump(model_job_links, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file( api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_job_links.json", path_or_fileobj=f"ci_results_{job_name}/model_job_links.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_job_links.json", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_job_links.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
@ -1294,26 +1307,57 @@ if __name__ == "__main__":
with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp: with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
json.dump(job_result, fp, indent=4, ensure_ascii=False) json.dump(job_result, fp, indent=4, ensure_ascii=False)
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file( api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
prev_ci_artifacts = None prev_workflow_run_id = None
other_workflow_run_ids = []
if is_scheduled_ci_run: if is_scheduled_ci_run:
# TODO: remove `if job_name == "run_models_gpu"`
if job_name == "run_models_gpu": if job_name == "run_models_gpu":
# Get the last previously completed CI's failure tables prev_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=workflow_id
)
# For a scheduled run that is not the Nvidia's scheduled daily CI, add Nvidia's scheduled daily CI run as a target to compare.
if not is_nvidia_daily_ci_workflow:
# The id of the workflow `.github/workflows/self-scheduled-caller.yml` (not of a workflow run of it).
other_workflow_id = "90575235"
# We need to get the Nvidia's scheduled daily CI run that match the current run (i.e. run with the same commit SHA)
other_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=other_workflow_id, commit_sha=ci_sha
)
other_workflow_run_ids.append(other_workflow_run_id)
else:
prev_workflow_run_id = os.environ["PREV_WORKFLOW_RUN_ID"]
other_workflow_run_id = os.environ["OTHER_WORKFLOW_RUN_ID"]
other_workflow_run_ids.append(other_workflow_run_id)
prev_ci_artifacts = (None, None)
other_ci_artifacts = []
for idx, target_workflow_run_id in enumerate([prev_workflow_run_id] + other_workflow_run_ids):
if target_workflow_run_id is None or target_workflow_run_id == "":
continue
else:
artifact_names = [f"ci_results_{job_name}"] artifact_names = [f"ci_results_{job_name}"]
output_dir = os.path.join(os.getcwd(), "previous_reports") output_dir = os.path.join(os.getcwd(), "previous_reports")
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
prev_ci_artifacts = get_last_daily_ci_reports( ci_artifacts = get_last_daily_ci_reports(
artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"] artifact_names=artifact_names,
output_dir=output_dir,
token=os.environ["ACCESS_REPO_INFO_TOKEN"],
workflow_run_id=target_workflow_run_id,
) )
if idx == 0:
prev_ci_artifacts = (target_workflow_run_id, ci_artifacts)
else:
other_ci_artifacts.append((target_workflow_run_id, ci_artifacts))
job_to_test_map.update( job_to_test_map.update(
{ {
@ -1335,6 +1379,7 @@ if __name__ == "__main__":
additional_results, additional_results,
selected_warnings=selected_warnings, selected_warnings=selected_warnings,
prev_ci_artifacts=prev_ci_artifacts, prev_ci_artifacts=prev_ci_artifacts,
other_ci_artifacts=other_ci_artifacts,
) )
# send report only if there is any failure (for push CI) # send report only if there is any failure (for push CI)

View File

@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import ast import ast
import datetime
import json import json
import os import os
import sys import sys
@ -21,6 +20,7 @@ import time
from typing import Dict from typing import Dict
from get_ci_error_statistics import get_jobs from get_ci_error_statistics import get_jobs
from get_previous_daily_ci import get_last_daily_ci_run
from huggingface_hub import HfApi from huggingface_hub import HfApi
from notification_service import ( from notification_service import (
Message, Message,
@ -246,20 +246,38 @@ if __name__ == "__main__":
) )
job_name = os.getenv("CI_TEST_JOB") job_name = os.getenv("CI_TEST_JOB")
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"
workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
workflow_id = workflow_run["workflow_id"]
report_repo_folder = workflow_run_created_time.split("T")[0]
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")): if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp: with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
json.dump(quantization_results, fp, indent=4, ensure_ascii=False) json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
# upload results to Hub dataset (only for the scheduled daily CI run on `main`) # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
if is_scheduled_ci_run:
api.upload_file( api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/quantization_results.json", path_or_fileobj=f"ci_results_{job_name}/quantization_results.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/quantization_results.json", path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/quantization_results.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),

View File

@ -12,12 +12,12 @@ This is used by `.github/workflows/check_failed_model_tests.yml` to produce a sl
``` ```
""" """
import datetime
import json import json
import os import os
from collections import Counter from collections import Counter
from copy import deepcopy from copy import deepcopy
from get_previous_daily_ci import get_last_daily_ci_run
from huggingface_hub import HfApi from huggingface_hub import HfApi
@ -76,16 +76,32 @@ if __name__ == "__main__":
new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0} new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
# Upload to Hub and get the url # Upload to Hub and get the url
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"
workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
report_repo_folder = workflow_run_created_time.split("T")[0]
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp: with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
json.dump(new_data_full, fp, ensure_ascii=False, indent=4) json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
commit_info = api.upload_file( commit_info = api.upload_file(
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json", path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json", path_in_repo=f"{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
repo_id="hf-internal-testing/transformers_daily_ci", repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset", repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
) )
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json" url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
# Add `GH_` prefix as keyword mention # Add `GH_` prefix as keyword mention
output = {} output = {}