mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-30 17:52:35 +06:00
A script to download artifacts and perform CI error statistics (#18865)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
9196f48b95
commit
4e29b3f884
154
utils/get_ci_error_statistics.py
Normal file
154
utils/get_ci_error_statistics.py
Normal file
@ -0,0 +1,154 @@
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import zipfile
|
||||
from collections import Counter
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def get_artifacts_links(worflow_run_id):
|
||||
"""Get all artifact links from a workflow run"""
|
||||
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{worflow_run_id}/artifacts?per_page=100"
|
||||
result = requests.get(url).json()
|
||||
artifacts = {}
|
||||
|
||||
try:
|
||||
artifacts.update({artifact["name"]: artifact["archive_download_url"] for artifact in result["artifacts"]})
|
||||
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
|
||||
|
||||
for i in range(pages_to_iterate_over):
|
||||
result = requests.get(url + f"&page={i + 2}").json()
|
||||
artifacts.update({artifact["name"]: artifact["archive_download_url"] for artifact in result["artifacts"]})
|
||||
|
||||
return artifacts
|
||||
except Exception as e:
|
||||
print("Unknown error, could not fetch links.", e)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def download_artifact(artifact_name, artifact_url, output_dir, token):
|
||||
"""Download a GitHub Action artifact from a URL.
|
||||
|
||||
The URL is of the from `https://api.github.com/repos/huggingface/transformers/actions/artifacts/{ARTIFACT_ID}/zip`,
|
||||
but it can't be used to download directly. We need to get a redirect URL first.
|
||||
See https://docs.github.com/en/rest/actions/artifacts#download-an-artifact
|
||||
"""
|
||||
# Get the redirect URL first
|
||||
cmd = f'curl -v -H "Accept: application/vnd.github+json" -H "Authorization: token {token}" {artifact_url}'
|
||||
output = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
o = output.stdout.decode("utf-8")
|
||||
lines = o.splitlines()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("< Location: "):
|
||||
redirect_url = line[len("< Location: ") :]
|
||||
r = requests.get(redirect_url, allow_redirects=True)
|
||||
p = os.path.join(output_dir, f"{artifact_name}.zip")
|
||||
open(p, "wb").write(r.content)
|
||||
break
|
||||
|
||||
|
||||
def get_errors_from_single_artifact(artifact_zip_path):
|
||||
"""Extract errors from a downloaded artifact (in .zip format)"""
|
||||
errors = []
|
||||
failed_tests = []
|
||||
|
||||
with zipfile.ZipFile(artifact_zip_path) as z:
|
||||
for filename in z.namelist():
|
||||
if not os.path.isdir(filename):
|
||||
# read the file
|
||||
if filename in ["failures_line.txt", "summary_short.txt"]:
|
||||
with z.open(filename) as f:
|
||||
for line in f:
|
||||
line = line.decode("UTF-8").strip()
|
||||
if filename == "failures_line.txt":
|
||||
try:
|
||||
# `error_line` is the place where `error` occurs
|
||||
error_line = line[: line.index(": ")]
|
||||
error = line[line.index(": ") + len(": ") :]
|
||||
errors.append([error_line, error])
|
||||
except Exception:
|
||||
# skip un-related lines
|
||||
pass
|
||||
elif filename == "summary_short.txt" and line.startswith("FAILED "):
|
||||
# `test` is the test method that failed
|
||||
test = line[len("FAILED ") :]
|
||||
failed_tests.append(test)
|
||||
|
||||
if len(errors) != len(failed_tests):
|
||||
raise ValueError(
|
||||
f"`errors` and `failed_tests` should have the same number of elements. Got {len(errors)} for `errors` "
|
||||
f"and {len(failed_tests)} for `failed_tests` instead. The test reports in {artifact_zip_path} have some"
|
||||
" problem."
|
||||
)
|
||||
|
||||
return errors, failed_tests
|
||||
|
||||
|
||||
def get_all_errors(artifact_dir):
|
||||
"""Extract errors from all artifact files"""
|
||||
|
||||
errors = []
|
||||
failed_tests = []
|
||||
|
||||
paths = [os.path.join(artifact_dir, p) for p in os.listdir(artifact_dir) if p.endswith(".zip")]
|
||||
|
||||
for p in paths:
|
||||
_errors, _failed_tests = get_errors_from_single_artifact(p)
|
||||
errors.extend(_errors)
|
||||
failed_tests.extend(_failed_tests)
|
||||
|
||||
return errors, failed_tests
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required parameters
|
||||
parser.add_argument(
|
||||
"--workflow_run_id", default=None, type=str, required=True, help="A GitHub Actions workflow run id."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output_dir",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="Where to store the downloaded artifacts and other result files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--token", default=None, type=str, required=True, help="A token that has actions:read permission."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
artifacts = get_artifacts_links(args.workflow_run_id)
|
||||
with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp:
|
||||
json.dump(artifacts, fp, ensure_ascii=False, indent=4)
|
||||
|
||||
for idx, (name, url) in enumerate(artifacts.items()):
|
||||
download_artifact(name, url, args.output_dir, args.token)
|
||||
# Be gentle to GitHub
|
||||
time.sleep(1)
|
||||
|
||||
errors, failed_tests = get_all_errors(args.output_dir)
|
||||
|
||||
counter = Counter()
|
||||
counter.update([e[1] for e in errors])
|
||||
|
||||
# print the top 30 most common test errors
|
||||
most_common = counter.most_common(30)
|
||||
for item in most_common:
|
||||
print(item)
|
||||
|
||||
with open(os.path.join(args.output_dir, "errors.json"), "w", encoding="UTF-8") as fp:
|
||||
json.dump(errors, fp, ensure_ascii=False, indent=4)
|
||||
|
||||
with open(os.path.join(args.output_dir, "failed_tests.json"), "w", encoding="UTF-8") as fp:
|
||||
json.dump(failed_tests, fp, ensure_ascii=False, indent=4)
|
Loading…
Reference in New Issue
Block a user