mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
CI reporting improvements (#38230)
update Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
cb513e35f9
commit
feec294dea
51
.github/workflows/check_failed_model_tests.yml
vendored
51
.github/workflows/check_failed_model_tests.yml
vendored
@ -39,55 +39,100 @@ jobs:
|
||||
name: ci_results_run_models_gpu
|
||||
path: /transformers/ci_results_run_models_gpu
|
||||
|
||||
- name: Check file
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
if [ -f ci_results_run_models_gpu/new_model_failures.json ]; then
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` exists, continue ..."
|
||||
echo "process=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` doesn't exist, abort."
|
||||
echo "process=false" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
if: ${{ env.process == 'true' }}
|
||||
with:
|
||||
pattern: setup_values*
|
||||
path: setup_values
|
||||
merge-multiple: true
|
||||
|
||||
- name: Prepare some setup values
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Get target commit
|
||||
working-directory: /transformers/utils
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout to `start_sha`
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: pip freeze
|
||||
|
||||
- name: Check failed tests
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Show results
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
ls -l new_model_failures_with_bad_commit.json
|
||||
cat new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Checkout back
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
run: |
|
||||
python3 utils/process_bad_commit_report.py
|
||||
@ -95,7 +140,9 @@ jobs:
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
run: |
|
||||
{
|
||||
@ -105,7 +152,7 @@ jobs:
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Send processed report
|
||||
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
|
||||
with:
|
||||
# Slack channel id, channel name, or user id to post message.
|
||||
|
35
.github/workflows/self-scheduled-caller.yml
vendored
35
.github/workflows/self-scheduled-caller.yml
vendored
@ -8,8 +8,43 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
description: 'previous workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
other_workflow_run_id:
|
||||
description: 'other workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
|
||||
# Used for `push` to easily modiffy the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
|
18
.github/workflows/slack-report.yml
vendored
18
.github/workflows/slack-report.yml
vendored
@ -39,6 +39,21 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Prepare some setup values
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Send message to Slack
|
||||
if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
|
||||
env:
|
||||
@ -50,7 +65,6 @@ jobs:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
@ -58,7 +72,6 @@ jobs:
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
# empty string, and the called script still get one argument (which is the emtpy string).
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
@ -86,7 +99,6 @@ jobs:
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
|
@ -144,7 +144,8 @@ def get_commit_info(commit):
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}"
|
||||
pr_for_commit = requests.get(url).json()
|
||||
author = pr_for_commit["user"]["login"]
|
||||
merged_author = pr_for_commit["merged_by"]["login"]
|
||||
if pr_for_commit["merged_by"] is not None:
|
||||
merged_author = pr_for_commit["merged_by"]["login"]
|
||||
|
||||
if author is None:
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}"
|
||||
|
@ -5,7 +5,7 @@ import requests
|
||||
from get_ci_error_statistics import download_artifact, get_artifacts_links
|
||||
|
||||
|
||||
def get_daily_ci_runs(token, num_runs=7):
|
||||
def get_daily_ci_runs(token, num_runs=7, workflow_id=None):
|
||||
"""Get the workflow runs of the scheduled (daily) CI.
|
||||
|
||||
This only selects the runs triggered by the `schedule` event on the `main` branch.
|
||||
@ -18,7 +18,13 @@ def get_daily_ci_runs(token, num_runs=7):
|
||||
# From a given workflow run (where we have workflow run id), we can get the workflow id by going to
|
||||
# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
|
||||
# and check the `workflow_id` key.
|
||||
workflow_id = "90575235"
|
||||
|
||||
if not workflow_id:
|
||||
workflow_run_id = os.environ["GITHUB_RUN_ID"]
|
||||
workflow_run = requests.get(
|
||||
f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}", headers=headers
|
||||
).json()
|
||||
workflow_id = workflow_run["workflow_id"]
|
||||
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
|
||||
# On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
|
||||
@ -29,33 +35,64 @@ def get_daily_ci_runs(token, num_runs=7):
|
||||
return result["workflow_runs"]
|
||||
|
||||
|
||||
def get_last_daily_ci_runs(token):
|
||||
def get_last_daily_ci_run(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
|
||||
"""Get the last completed workflow run id of the scheduled (daily) CI."""
|
||||
workflow_runs = get_daily_ci_runs(token)
|
||||
workflow_run_id = None
|
||||
for workflow_run in workflow_runs:
|
||||
if workflow_run["status"] == "completed":
|
||||
workflow_run_id = workflow_run["id"]
|
||||
headers = None
|
||||
if token is not None:
|
||||
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
|
||||
|
||||
workflow_run = None
|
||||
if workflow_run_id is not None and workflow_run_id != "":
|
||||
workflow_run = requests.get(
|
||||
f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}", headers=headers
|
||||
).json()
|
||||
return workflow_run
|
||||
|
||||
workflow_runs = get_daily_ci_runs(token, workflow_id=workflow_id)
|
||||
for run in workflow_runs:
|
||||
if commit_sha in [None, ""] and run["status"] == "completed":
|
||||
workflow_run = run
|
||||
break
|
||||
# if `commit_sha` is specified, and `workflow_run["head_sha"]` matches it, return it.
|
||||
elif commit_sha not in [None, ""] and run["head_sha"] == commit_sha:
|
||||
workflow_run = run
|
||||
break
|
||||
|
||||
return workflow_run
|
||||
|
||||
|
||||
def get_last_daily_ci_workflow_run_id(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
|
||||
"""Get the last completed workflow run id of the scheduled (daily) CI."""
|
||||
if workflow_run_id is not None and workflow_run_id != "":
|
||||
return workflow_run_id
|
||||
|
||||
workflow_run = get_last_daily_ci_run(token, workflow_id=workflow_id, commit_sha=commit_sha)
|
||||
workflow_run_id = None
|
||||
if workflow_run is not None:
|
||||
workflow_run_id = workflow_run["id"]
|
||||
|
||||
return workflow_run_id
|
||||
|
||||
|
||||
def get_last_daily_ci_run_commit(token):
|
||||
def get_last_daily_ci_run_commit(token, workflow_run_id=None, workflow_id=None, commit_sha=None):
|
||||
"""Get the commit sha of the last completed scheduled daily CI workflow run."""
|
||||
workflow_runs = get_daily_ci_runs(token)
|
||||
head_sha = None
|
||||
for workflow_run in workflow_runs:
|
||||
if workflow_run["status"] == "completed":
|
||||
head_sha = workflow_run["head_sha"]
|
||||
break
|
||||
workflow_run = get_last_daily_ci_run(
|
||||
token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha
|
||||
)
|
||||
workflow_run_head_sha = None
|
||||
if workflow_run is not None:
|
||||
workflow_run_head_sha = workflow_run["head_sha"]
|
||||
|
||||
return head_sha
|
||||
return workflow_run_head_sha
|
||||
|
||||
|
||||
def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
|
||||
def get_last_daily_ci_artifacts(
|
||||
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None
|
||||
):
|
||||
"""Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
|
||||
workflow_run_id = get_last_daily_ci_runs(token)
|
||||
workflow_run_id = get_last_daily_ci_workflow_run_id(
|
||||
token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha
|
||||
)
|
||||
if workflow_run_id is not None:
|
||||
artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
|
||||
for artifact_name in artifact_names:
|
||||
@ -66,9 +103,18 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
|
||||
)
|
||||
|
||||
|
||||
def get_last_daily_ci_reports(artifact_names, output_dir, token):
|
||||
def get_last_daily_ci_reports(
|
||||
artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None
|
||||
):
|
||||
"""Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
|
||||
get_last_daily_ci_artifacts(artifact_names, output_dir, token)
|
||||
get_last_daily_ci_artifacts(
|
||||
artifact_names,
|
||||
output_dir,
|
||||
token,
|
||||
workflow_run_id=workflow_run_id,
|
||||
workflow_id=workflow_id,
|
||||
commit_sha=commit_sha,
|
||||
)
|
||||
|
||||
results = {}
|
||||
for artifact_name in artifact_names:
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
import ast
|
||||
import collections
|
||||
import datetime
|
||||
import functools
|
||||
import json
|
||||
import operator
|
||||
@ -26,7 +25,7 @@ from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
from get_ci_error_statistics import get_jobs
|
||||
from get_previous_daily_ci import get_last_daily_ci_reports
|
||||
from get_previous_daily_ci import get_last_daily_ci_reports, get_last_daily_ci_run, get_last_daily_ci_workflow_run_id
|
||||
from huggingface_hub import HfApi
|
||||
from slack_sdk import WebClient
|
||||
|
||||
@ -109,6 +108,7 @@ class Message:
|
||||
additional_results: Dict,
|
||||
selected_warnings: Optional[List] = None,
|
||||
prev_ci_artifacts=None,
|
||||
other_ci_artifacts=None,
|
||||
):
|
||||
self.title = title
|
||||
self.ci_title = ci_title
|
||||
@ -159,6 +159,7 @@ class Message:
|
||||
self.selected_warnings = selected_warnings
|
||||
|
||||
self.prev_ci_artifacts = prev_ci_artifacts
|
||||
self.other_ci_artifacts = other_ci_artifacts
|
||||
|
||||
@property
|
||||
def time(self) -> str:
|
||||
@ -515,71 +516,83 @@ class Message:
|
||||
if len(self.selected_warnings) > 0:
|
||||
blocks.append(self.warnings)
|
||||
|
||||
new_failure_blocks = self.get_new_model_failure_blocks(with_header=False)
|
||||
if len(new_failure_blocks) > 0:
|
||||
blocks.extend(new_failure_blocks)
|
||||
for idx, (prev_workflow_run_id, prev_ci_artifacts) in enumerate(
|
||||
[self.prev_ci_artifacts] + self.other_ci_artifacts
|
||||
):
|
||||
if idx == 0:
|
||||
# This is the truncated version to show on slack. For now.
|
||||
new_failure_blocks = self.get_new_model_failure_blocks(
|
||||
prev_ci_artifacts=prev_ci_artifacts, with_header=False
|
||||
)
|
||||
if len(new_failure_blocks) > 0:
|
||||
blocks.extend(new_failure_blocks)
|
||||
|
||||
# To save the list of new model failures
|
||||
extra_blocks = self.get_new_model_failure_blocks(to_truncate=False)
|
||||
if extra_blocks:
|
||||
failure_text = extra_blocks[-1]["text"]["text"]
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
fp.write(failure_text)
|
||||
# To save the list of new model failures and uploaed to hub repositories
|
||||
extra_blocks = self.get_new_model_failure_blocks(prev_ci_artifacts=prev_ci_artifacts, to_truncate=False)
|
||||
if extra_blocks:
|
||||
filename = "new_model_failures"
|
||||
if idx > 0:
|
||||
filename = f"{filename}_against_{prev_workflow_run_id}"
|
||||
|
||||
# upload results to Hub dataset
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.txt")
|
||||
commit_info = api.upload_file(
|
||||
path_or_fileobj=file_path,
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
|
||||
failure_text = extra_blocks[-1]["text"]["text"]
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.txt")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
fp.write(failure_text)
|
||||
|
||||
# extra processing to save to json format
|
||||
new_failed_tests = {}
|
||||
for line in failure_text.split():
|
||||
if "https://github.com/huggingface/transformers/actions/runs" in line:
|
||||
pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
|
||||
items = re.findall(pattern, line)
|
||||
elif "tests/" in line:
|
||||
if "tests/models/" in line:
|
||||
model = line.split("/")[2]
|
||||
else:
|
||||
model = line.split("/")[1]
|
||||
if model not in new_failed_tests:
|
||||
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
|
||||
for url, device in items:
|
||||
new_failed_tests[model][f"{device}-gpu"].append(line)
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
|
||||
# upload results to Hub dataset
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.txt")
|
||||
commit_info = api.upload_file(
|
||||
path_or_fileobj=file_path,
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.txt",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/{filename}.txt"
|
||||
|
||||
# upload results to Hub dataset
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/new_model_failures.json")
|
||||
_ = api.upload_file(
|
||||
path_or_fileobj=file_path,
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
# extra processing to save to json format
|
||||
new_failed_tests = {}
|
||||
for line in failure_text.split():
|
||||
if "https://github.com/huggingface/transformers/actions/runs" in line:
|
||||
pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
|
||||
items = re.findall(pattern, line)
|
||||
elif "tests/" in line:
|
||||
if "tests/models/" in line:
|
||||
model = line.split("/")[2]
|
||||
else:
|
||||
model = line.split("/")[1]
|
||||
if model not in new_failed_tests:
|
||||
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
|
||||
for url, device in items:
|
||||
new_failed_tests[model][f"{device}-gpu"].append(line)
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
|
||||
|
||||
block = {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": " ",
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check New model failures"},
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
blocks.append(block)
|
||||
# upload results to Hub dataset
|
||||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
|
||||
_ = api.upload_file(
|
||||
path_or_fileobj=file_path,
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
|
||||
if idx == 0:
|
||||
block = {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": " ",
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check New model failures"},
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
blocks.append(block)
|
||||
|
||||
return json.dumps(blocks)
|
||||
|
||||
@ -700,18 +713,18 @@ class Message:
|
||||
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
|
||||
]
|
||||
|
||||
def get_new_model_failure_blocks(self, with_header=True, to_truncate=True):
|
||||
if self.prev_ci_artifacts is None:
|
||||
def get_new_model_failure_blocks(self, prev_ci_artifacts, with_header=True, to_truncate=True):
|
||||
if prev_ci_artifacts is None:
|
||||
return []
|
||||
|
||||
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
|
||||
|
||||
prev_model_results = {}
|
||||
if (
|
||||
f"ci_results_{job_name}" in self.prev_ci_artifacts
|
||||
and "model_results.json" in self.prev_ci_artifacts[f"ci_results_{job_name}"]
|
||||
f"ci_results_{job_name}" in prev_ci_artifacts
|
||||
and "model_results.json" in prev_ci_artifacts[f"ci_results_{job_name}"]
|
||||
):
|
||||
prev_model_results = json.loads(self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
|
||||
prev_model_results = json.loads(prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
|
||||
|
||||
all_failure_lines = {}
|
||||
for job, job_result in sorted_dict:
|
||||
@ -812,20 +825,6 @@ class Message:
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
blocks = self.get_new_model_failure_blocks()
|
||||
if blocks:
|
||||
print("Sending the following reply")
|
||||
print(json.dumps({"blocks": blocks}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel=SLACK_REPORT_CHANNEL_ID,
|
||||
text="Results for new failures",
|
||||
blocks=blocks,
|
||||
thread_ts=self.thread_ts["ts"],
|
||||
)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def retrieve_artifact(artifact_path: str, gpu: Optional[str]):
|
||||
if gpu not in [None, "single", "multi"]:
|
||||
@ -1168,6 +1167,23 @@ if __name__ == "__main__":
|
||||
"run_torch_cuda_extensions_gpu": "DeepSpeed",
|
||||
}
|
||||
|
||||
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
|
||||
report_repo_subfolder = ""
|
||||
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
|
||||
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
|
||||
report_repo_subfolder = f"runs/{report_repo_subfolder}"
|
||||
|
||||
workflow_run = get_last_daily_ci_run(
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
|
||||
)
|
||||
workflow_run_created_time = workflow_run["created_at"]
|
||||
workflow_id = workflow_run["workflow_id"]
|
||||
|
||||
report_repo_folder = workflow_run_created_time.split("T")[0]
|
||||
|
||||
if report_repo_subfolder:
|
||||
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
|
||||
|
||||
# Remove some entries in `additional_files` if they are not concerned.
|
||||
test_name = None
|
||||
if job_name in job_to_test_map:
|
||||
@ -1241,8 +1257,9 @@ if __name__ == "__main__":
|
||||
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
|
||||
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
|
||||
|
||||
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
|
||||
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
|
||||
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
|
||||
is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
|
||||
is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
|
||||
|
||||
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
|
||||
# results.
|
||||
@ -1250,15 +1267,13 @@ if __name__ == "__main__":
|
||||
with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp:
|
||||
json.dump(model_results, fp, indent=4, ensure_ascii=False)
|
||||
|
||||
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
|
||||
if is_scheduled_ci_run:
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/model_results.json",
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/model_results.json",
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
|
||||
# Let's create a file contain job --> job link
|
||||
model_job_links = {}
|
||||
@ -1272,15 +1287,13 @@ if __name__ == "__main__":
|
||||
with open(f"ci_results_{job_name}/model_job_links.json", "w", encoding="UTF-8") as fp:
|
||||
json.dump(model_job_links, fp, indent=4, ensure_ascii=False)
|
||||
|
||||
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
|
||||
if is_scheduled_ci_run:
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/model_job_links.json",
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_job_links.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/model_job_links.json",
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_job_links.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
|
||||
# Must have the same keys as in `additional_results`.
|
||||
# The values are used as the file names where to save the corresponding CI job results.
|
||||
@ -1294,26 +1307,57 @@ if __name__ == "__main__":
|
||||
with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
|
||||
json.dump(job_result, fp, indent=4, ensure_ascii=False)
|
||||
|
||||
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
|
||||
if is_scheduled_ci_run:
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
|
||||
prev_workflow_run_id = None
|
||||
other_workflow_run_ids = []
|
||||
|
||||
prev_ci_artifacts = None
|
||||
if is_scheduled_ci_run:
|
||||
# TODO: remove `if job_name == "run_models_gpu"`
|
||||
if job_name == "run_models_gpu":
|
||||
# Get the last previously completed CI's failure tables
|
||||
prev_workflow_run_id = get_last_daily_ci_workflow_run_id(
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=workflow_id
|
||||
)
|
||||
# For a scheduled run that is not the Nvidia's scheduled daily CI, add Nvidia's scheduled daily CI run as a target to compare.
|
||||
if not is_nvidia_daily_ci_workflow:
|
||||
# The id of the workflow `.github/workflows/self-scheduled-caller.yml` (not of a workflow run of it).
|
||||
other_workflow_id = "90575235"
|
||||
# We need to get the Nvidia's scheduled daily CI run that match the current run (i.e. run with the same commit SHA)
|
||||
other_workflow_run_id = get_last_daily_ci_workflow_run_id(
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=other_workflow_id, commit_sha=ci_sha
|
||||
)
|
||||
other_workflow_run_ids.append(other_workflow_run_id)
|
||||
else:
|
||||
prev_workflow_run_id = os.environ["PREV_WORKFLOW_RUN_ID"]
|
||||
other_workflow_run_id = os.environ["OTHER_WORKFLOW_RUN_ID"]
|
||||
other_workflow_run_ids.append(other_workflow_run_id)
|
||||
|
||||
prev_ci_artifacts = (None, None)
|
||||
other_ci_artifacts = []
|
||||
|
||||
for idx, target_workflow_run_id in enumerate([prev_workflow_run_id] + other_workflow_run_ids):
|
||||
if target_workflow_run_id is None or target_workflow_run_id == "":
|
||||
continue
|
||||
else:
|
||||
artifact_names = [f"ci_results_{job_name}"]
|
||||
output_dir = os.path.join(os.getcwd(), "previous_reports")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
prev_ci_artifacts = get_last_daily_ci_reports(
|
||||
artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
|
||||
ci_artifacts = get_last_daily_ci_reports(
|
||||
artifact_names=artifact_names,
|
||||
output_dir=output_dir,
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"],
|
||||
workflow_run_id=target_workflow_run_id,
|
||||
)
|
||||
if idx == 0:
|
||||
prev_ci_artifacts = (target_workflow_run_id, ci_artifacts)
|
||||
else:
|
||||
other_ci_artifacts.append((target_workflow_run_id, ci_artifacts))
|
||||
|
||||
job_to_test_map.update(
|
||||
{
|
||||
@ -1335,6 +1379,7 @@ if __name__ == "__main__":
|
||||
additional_results,
|
||||
selected_warnings=selected_warnings,
|
||||
prev_ci_artifacts=prev_ci_artifacts,
|
||||
other_ci_artifacts=other_ci_artifacts,
|
||||
)
|
||||
|
||||
# send report only if there is any failure (for push CI)
|
||||
|
@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
@ -21,6 +20,7 @@ import time
|
||||
from typing import Dict
|
||||
|
||||
from get_ci_error_statistics import get_jobs
|
||||
from get_previous_daily_ci import get_last_daily_ci_run
|
||||
from huggingface_hub import HfApi
|
||||
from notification_service import (
|
||||
Message,
|
||||
@ -246,24 +246,42 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
job_name = os.getenv("CI_TEST_JOB")
|
||||
|
||||
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
|
||||
report_repo_subfolder = ""
|
||||
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
|
||||
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
|
||||
report_repo_subfolder = f"runs/{report_repo_subfolder}"
|
||||
|
||||
workflow_run = get_last_daily_ci_run(
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
|
||||
)
|
||||
workflow_run_created_time = workflow_run["created_at"]
|
||||
workflow_id = workflow_run["workflow_id"]
|
||||
|
||||
report_repo_folder = workflow_run_created_time.split("T")[0]
|
||||
|
||||
if report_repo_subfolder:
|
||||
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
|
||||
|
||||
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
|
||||
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
|
||||
|
||||
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
|
||||
is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
|
||||
is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
|
||||
|
||||
with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
|
||||
json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
|
||||
|
||||
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
|
||||
is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
|
||||
|
||||
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
|
||||
if is_scheduled_ci_run:
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/quantization_results.json",
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/quantization_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
api.upload_file(
|
||||
path_or_fileobj=f"ci_results_{job_name}/quantization_results.json",
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/quantization_results.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
|
||||
message = QuantizationMessage(
|
||||
title,
|
||||
|
@ -12,12 +12,12 @@ This is used by `.github/workflows/check_failed_model_tests.yml` to produce a sl
|
||||
```
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
from copy import deepcopy
|
||||
|
||||
from get_previous_daily_ci import get_last_daily_ci_run
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
|
||||
@ -76,16 +76,32 @@ if __name__ == "__main__":
|
||||
new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}
|
||||
|
||||
# Upload to Hub and get the url
|
||||
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
|
||||
report_repo_subfolder = ""
|
||||
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
|
||||
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
|
||||
report_repo_subfolder = f"runs/{report_repo_subfolder}"
|
||||
|
||||
workflow_run = get_last_daily_ci_run(
|
||||
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
|
||||
)
|
||||
workflow_run_created_time = workflow_run["created_at"]
|
||||
|
||||
report_repo_folder = workflow_run_created_time.split("T")[0]
|
||||
|
||||
if report_repo_subfolder:
|
||||
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
|
||||
|
||||
with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
|
||||
json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
|
||||
commit_info = api.upload_file(
|
||||
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
|
||||
path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
|
||||
path_in_repo=f"{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
|
||||
repo_id="hf-internal-testing/transformers_daily_ci",
|
||||
repo_type="dataset",
|
||||
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
|
||||
)
|
||||
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
|
||||
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
|
||||
|
||||
# Add `GH_` prefix as keyword mention
|
||||
output = {}
|
||||
|
Loading…
Reference in New Issue
Block a user