mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
Refactor doctest (#30210)
* fix * update * fix * update * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
b3595cf02b
commit
b6b6daf2b7
81
.github/workflows/doctest_job.yml
vendored
Normal file
81
.github/workflows/doctest_job.yml
vendored
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
name: Doctest job
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
job_splits:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
split_keys:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
env:
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
|
TRANSFORMERS_IS_CI: yes
|
||||||
|
RUN_SLOW: yes
|
||||||
|
OMP_NUM_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
|
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||||
|
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_doctests:
|
||||||
|
name: " "
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
split_keys: ${{ fromJson(inputs.split_keys) }}
|
||||||
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
|
container:
|
||||||
|
image: huggingface/transformers-all-latest-gpu
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
steps:
|
||||||
|
- name: Update clone
|
||||||
|
working-directory: /transformers
|
||||||
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||||
|
working-directory: /transformers
|
||||||
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[flax]
|
||||||
|
|
||||||
|
- name: GPU visibility
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
|
- name: Show installed libraries and their versions
|
||||||
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Get doctest files
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
echo "${{ toJson(fromJson(inputs.job_splits)[matrix.split_keys]) }}" > doc_tests.txt
|
||||||
|
cat doc_tests.txt
|
||||||
|
|
||||||
|
- name: Set `split_keys`
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.split_keys }}"
|
||||||
|
split_keys=${{ matrix.split_keys }}
|
||||||
|
split_keys=${split_keys//'/'/'_'}
|
||||||
|
echo "split_keys"
|
||||||
|
echo "split_keys=$split_keys" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Run doctests
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
cat doc_tests.txt
|
||||||
|
python3 -m pytest -v --make-reports doc_tests_gpu_${{ env.split_keys }} --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
|
||||||
|
|
||||||
|
- name: Failure short reports
|
||||||
|
if: ${{ failure() }}
|
||||||
|
continue-on-error: true
|
||||||
|
run: cat /transformers/reports/doc_tests_gpu_${{ env.split_keys }}/failures_short.txt
|
||||||
|
|
||||||
|
- name: "Test suite reports artifacts: doc_tests_gpu_test_reports_${{ env.split_keys }}"
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: doc_tests_gpu_test_reports_${{ env.split_keys }}
|
||||||
|
path: /transformers/reports/doc_tests_gpu_${{ env.split_keys }}
|
86
.github/workflows/doctests.yml
vendored
86
.github/workflows/doctests.yml
vendored
@ -3,81 +3,85 @@ name: Doctests
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- doctest*
|
- run_doctest*
|
||||||
repository_dispatch:
|
repository_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "17 2 * * *"
|
- cron: "17 2 * * *"
|
||||||
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
HF_HOME: /mnt/cache
|
NUM_SLICES: 3
|
||||||
TRANSFORMERS_IS_CI: yes
|
|
||||||
RUN_SLOW: yes
|
|
||||||
OMP_NUM_THREADS: 16
|
|
||||||
MKL_NUM_THREADS: 16
|
|
||||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_doctests:
|
setup:
|
||||||
|
name: Setup
|
||||||
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
outputs:
|
||||||
|
job_splits: ${{ steps.set-matrix.outputs.job_splits }}
|
||||||
|
split_keys: ${{ steps.set-matrix.outputs.split_keys }}
|
||||||
steps:
|
steps:
|
||||||
- name: uninstall transformers (installed during docker image build)
|
- name: Update clone
|
||||||
run: python3 -m pip uninstall -y transformers
|
working-directory: /transformers
|
||||||
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: |
|
run: |
|
||||||
nvidia-smi
|
git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
- name: Install transformers in edit mode
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||||
run: python3 -m pip install -e .[flax]
|
working-directory: /transformers
|
||||||
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||||
- name: GPU visibility
|
|
||||||
run: |
|
|
||||||
python3 utils/print_env.py
|
|
||||||
|
|
||||||
- name: Show installed libraries and their versions
|
- name: Show installed libraries and their versions
|
||||||
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
- name: Get doctest files
|
- name: Check values for matrix
|
||||||
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
$(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()')
|
python3 utils/split_doctest_jobs.py
|
||||||
|
python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }}
|
||||||
|
|
||||||
- name: Run doctests
|
- id: set-matrix
|
||||||
|
working-directory: /transformers
|
||||||
|
name: Set values for matrix
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
|
echo "job_splits=$(python3 utils/split_doctest_jobs.py)" >> $GITHUB_OUTPUT
|
||||||
|
echo "split_keys=$(python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Failure short reports
|
call_doctest_job:
|
||||||
if: ${{ failure() }}
|
name: "Call doctest jobs"
|
||||||
continue-on-error: true
|
needs: setup
|
||||||
run: cat reports/doc_tests_gpu/failures_short.txt
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
- name: Test suite reports artifacts
|
matrix:
|
||||||
if: ${{ always() }}
|
split_keys: ${{ fromJson(needs.setup.outputs.split_keys) }}
|
||||||
uses: actions/upload-artifact@v3
|
uses: ./.github/workflows/doctest_job.yml
|
||||||
with:
|
with:
|
||||||
name: doc_tests_gpu_test_reports
|
job_splits: ${{ needs.setup.outputs.job_splits }}
|
||||||
path: reports/doc_tests_gpu
|
split_keys: ${{ toJson(matrix.split_keys) }}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
name: Send results to webhook
|
name: Send results to webhook
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
if: always()
|
if: always()
|
||||||
needs: [run_doctests]
|
needs: [call_doctest_job]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions/download-artifact@v3
|
- uses: actions/download-artifact@v3
|
||||||
- name: Send message to Slack
|
- name: Send message to Slack
|
||||||
env:
|
env:
|
||||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
# Use `CI_SLACK_CHANNEL_DUMMY_TESTS` when doing experimentation
|
||||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
SLACK_REPORT_CHANNEL: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||||
run: |
|
run: |
|
||||||
pip install slack_sdk
|
pip install slack_sdk
|
||||||
python utils/notification_service_doc_tests.py
|
python utils/notification_service_doc_tests.py
|
||||||
|
|
||||||
|
- name: "Upload results"
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: doc_test_results
|
||||||
|
path: doc_test_results
|
@ -12,16 +12,13 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import collections
|
|
||||||
import json
|
import json
|
||||||
import math
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from fnmatch import fnmatch
|
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
import requests
|
from get_ci_error_statistics import get_jobs
|
||||||
from slack_sdk import WebClient
|
from slack_sdk import WebClient
|
||||||
|
|
||||||
|
|
||||||
@ -66,9 +63,8 @@ class Message:
|
|||||||
def __init__(self, title: str, doc_test_results: Dict):
|
def __init__(self, title: str, doc_test_results: Dict):
|
||||||
self.title = title
|
self.title = title
|
||||||
|
|
||||||
self._time_spent = doc_test_results["time_spent"].split(",")[0]
|
self.n_success = sum(job_result["n_success"] for job_result in doc_test_results.values())
|
||||||
self.n_success = doc_test_results["success"]
|
self.n_failures = sum(job_result["n_failures"] for job_result in doc_test_results.values())
|
||||||
self.n_failures = doc_test_results["failures"]
|
|
||||||
self.n_tests = self.n_success + self.n_failures
|
self.n_tests = self.n_success + self.n_failures
|
||||||
|
|
||||||
# Failures and success of the modeling tests
|
# Failures and success of the modeling tests
|
||||||
@ -76,7 +72,8 @@ class Message:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def time(self) -> str:
|
def time(self) -> str:
|
||||||
time_spent = [self._time_spent]
|
all_results = [*self.doc_test_results.values()]
|
||||||
|
time_spent = [r["time_spent"].split(", ")[0] for r in all_results if len(r["time_spent"])]
|
||||||
total_secs = 0
|
total_secs = 0
|
||||||
|
|
||||||
for time in time_spent:
|
for time in time_spent:
|
||||||
@ -205,7 +202,7 @@ class Message:
|
|||||||
print(json.dumps({"blocks": json.loads(payload)}))
|
print(json.dumps({"blocks": json.loads(payload)}))
|
||||||
|
|
||||||
client.chat_postMessage(
|
client.chat_postMessage(
|
||||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
channel=SLACK_REPORT_CHANNEL_ID,
|
||||||
text="There was an issue running the tests.",
|
text="There was an issue running the tests.",
|
||||||
blocks=payload,
|
blocks=payload,
|
||||||
)
|
)
|
||||||
@ -217,7 +214,7 @@ class Message:
|
|||||||
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
|
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
|
||||||
|
|
||||||
self.thread_ts = client.chat_postMessage(
|
self.thread_ts = client.chat_postMessage(
|
||||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
channel=SLACK_REPORT_CHANNEL_ID,
|
||||||
blocks=self.payload,
|
blocks=self.payload,
|
||||||
text=text,
|
text=text,
|
||||||
)
|
)
|
||||||
@ -248,7 +245,7 @@ class Message:
|
|||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{"type": "header", "text": {"type": "plain_text", "text": title.upper(), "emoji": True}},
|
{"type": "header", "text": {"type": "plain_text", "text": title, "emoji": True}},
|
||||||
content,
|
content,
|
||||||
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
|
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
|
||||||
]
|
]
|
||||||
@ -257,24 +254,19 @@ class Message:
|
|||||||
if self.thread_ts is None:
|
if self.thread_ts is None:
|
||||||
raise ValueError("Can only post reply if a post has been made.")
|
raise ValueError("Can only post reply if a post has been made.")
|
||||||
|
|
||||||
job_link = self.doc_test_results.pop("job_link")
|
|
||||||
self.doc_test_results.pop("failures")
|
|
||||||
self.doc_test_results.pop("success")
|
|
||||||
self.doc_test_results.pop("time_spent")
|
|
||||||
|
|
||||||
sorted_dict = sorted(self.doc_test_results.items(), key=lambda t: t[0])
|
sorted_dict = sorted(self.doc_test_results.items(), key=lambda t: t[0])
|
||||||
for job, job_result in sorted_dict:
|
for job_name, job_result in sorted_dict:
|
||||||
if len(job_result["failures"]):
|
if len(job_result["failures"]) > 0:
|
||||||
text = f"*Num failures* :{len(job_result['failed'])} \n"
|
text = f"*Num failures* :{len(job_result['failed'])} \n"
|
||||||
failures = job_result["failures"]
|
failures = job_result["failures"]
|
||||||
blocks = self.get_reply_blocks(job, job_link, failures, text=text)
|
blocks = self.get_reply_blocks(job_name, job_result["job_link"], failures, text=text)
|
||||||
|
|
||||||
print("Sending the following reply")
|
print("Sending the following reply")
|
||||||
print(json.dumps({"blocks": blocks}))
|
print(json.dumps({"blocks": blocks}))
|
||||||
|
|
||||||
client.chat_postMessage(
|
client.chat_postMessage(
|
||||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
channel=SLACK_REPORT_CHANNEL_ID,
|
||||||
text=f"Results for {job}",
|
text=f"Results for {job_name}",
|
||||||
blocks=blocks,
|
blocks=blocks,
|
||||||
thread_ts=self.thread_ts["ts"],
|
thread_ts=self.thread_ts["ts"],
|
||||||
)
|
)
|
||||||
@ -282,27 +274,6 @@ class Message:
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
def get_job_links():
|
|
||||||
run_id = os.environ["GITHUB_RUN_ID"]
|
|
||||||
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}/jobs?per_page=100"
|
|
||||||
result = requests.get(url).json()
|
|
||||||
jobs = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
|
||||||
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
|
|
||||||
|
|
||||||
for i in range(pages_to_iterate_over):
|
|
||||||
result = requests.get(url + f"&page={i + 2}").json()
|
|
||||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
|
||||||
|
|
||||||
return jobs
|
|
||||||
except Exception as e:
|
|
||||||
print("Unknown error, could not fetch links.", e)
|
|
||||||
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_artifact(name: str):
|
def retrieve_artifact(name: str):
|
||||||
_artifact = {}
|
_artifact = {}
|
||||||
|
|
||||||
@ -344,37 +315,50 @@ def retrieve_available_artifacts():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
github_actions_job_links = get_job_links()
|
SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
|
||||||
available_artifacts = retrieve_available_artifacts()
|
|
||||||
|
|
||||||
docs = collections.OrderedDict(
|
github_actions_jobs = get_jobs(
|
||||||
[
|
workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
|
||||||
("*.py", "API Examples"),
|
|
||||||
("*.md", "MD Examples"),
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# This dict will contain all the information relative to each doc test category:
|
artifact_name_to_job_map = {}
|
||||||
|
for job in github_actions_jobs:
|
||||||
|
for step in job["steps"]:
|
||||||
|
if step["name"].startswith("Test suite reports artifacts: "):
|
||||||
|
artifact_name = step["name"][len("Test suite reports artifacts: ") :]
|
||||||
|
artifact_name_to_job_map[artifact_name] = job
|
||||||
|
break
|
||||||
|
|
||||||
|
available_artifacts = retrieve_available_artifacts()
|
||||||
|
|
||||||
|
doc_test_results = {}
|
||||||
|
# `artifact_key` is the artifact path
|
||||||
|
for artifact_key, artifact_obj in available_artifacts.items():
|
||||||
|
artifact_path = artifact_obj.paths[0]
|
||||||
|
if not artifact_path["path"].startswith("doc_tests_gpu_test_reports_"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# change "_" back to "/" (to show the job name as path)
|
||||||
|
job_name = artifact_path["path"].replace("doc_tests_gpu_test_reports_", "").replace("_", "/")
|
||||||
|
|
||||||
|
# This dict (for each job) will contain all the information relative to each doc test job, in particular:
|
||||||
# - failed: list of failed tests
|
# - failed: list of failed tests
|
||||||
# - failures: dict in the format 'test': 'error_message'
|
# - failures: dict in the format 'test': 'error_message'
|
||||||
doc_test_results = {
|
job_result = {}
|
||||||
v: {
|
doc_test_results[job_name] = job_result
|
||||||
"failed": [],
|
|
||||||
"failures": {},
|
|
||||||
}
|
|
||||||
for v in docs.values()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Link to the GitHub Action job
|
job = artifact_name_to_job_map[artifact_path["path"]]
|
||||||
doc_test_results["job_link"] = github_actions_job_links.get("run_doctests")
|
job_result["job_link"] = job["html_url"]
|
||||||
|
job_result["category"] = "Python Examples" if job_name.startswith("src/") else "MD Examples"
|
||||||
|
|
||||||
artifact_path = available_artifacts["doc_tests_gpu_test_reports"].paths[0]
|
artifact = retrieve_artifact(artifact_path["path"])
|
||||||
artifact = retrieve_artifact(artifact_path["name"])
|
|
||||||
if "stats" in artifact:
|
if "stats" in artifact:
|
||||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||||
doc_test_results["failures"] = failed
|
job_result["n_failures"] = failed
|
||||||
doc_test_results["success"] = success
|
job_result["n_success"] = success
|
||||||
doc_test_results["time_spent"] = time_spent[1:-1] + ", "
|
job_result["time_spent"] = time_spent[1:-1] + ", "
|
||||||
|
job_result["failed"] = []
|
||||||
|
job_result["failures"] = {}
|
||||||
|
|
||||||
all_failures = extract_first_line_failure(artifact["failures_short"])
|
all_failures = extract_first_line_failure(artifact["failures_short"])
|
||||||
for line in artifact["summary_short"].split("\n"):
|
for line in artifact["summary_short"].split("\n"):
|
||||||
@ -387,14 +371,14 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
file_path, test = line, line
|
file_path, test = line, line
|
||||||
|
|
||||||
for file_regex in docs.keys():
|
job_result["failed"].append(test)
|
||||||
if fnmatch(file_path, file_regex):
|
|
||||||
category = docs[file_regex]
|
|
||||||
doc_test_results[category]["failed"].append(test)
|
|
||||||
|
|
||||||
failure = all_failures[test] if test in all_failures else "N/A"
|
failure = all_failures[test] if test in all_failures else "N/A"
|
||||||
doc_test_results[category]["failures"][test] = failure
|
job_result["failures"][test] = failure
|
||||||
break
|
|
||||||
|
# Save and to be uploaded as artifact
|
||||||
|
os.makedirs("doc_test_results", exist_ok=True)
|
||||||
|
with open("doc_test_results/doc_test_results.json", "w", encoding="UTF-8") as fp:
|
||||||
|
json.dump(doc_test_results, fp, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
message = Message("🤗 Results of the doc tests.", doc_test_results)
|
message = Message("🤗 Results of the doc tests.", doc_test_results)
|
||||||
message.post()
|
message.post()
|
||||||
|
91
utils/split_doctest_jobs.py
Normal file
91
utils/split_doctest_jobs.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script is used to get the files against which we will run doc testing.
|
||||||
|
This uses `tests_fetcher.get_all_doctest_files` then groups the test files by their directory paths.
|
||||||
|
|
||||||
|
The files in `docs/source/en/model_doc` or `docs/source/en/tasks` are **NOT** grouped together with other files in the
|
||||||
|
same directory: the objective is to run doctest against them in independent GitHub Actions jobs.
|
||||||
|
|
||||||
|
Assume we are under `transformers` root directory:
|
||||||
|
To get a map (dictionary) between directory (or file) paths and the corresponding files
|
||||||
|
```bash
|
||||||
|
python utils/split_doctest_jobs.py
|
||||||
|
```
|
||||||
|
or to get a list of lists of directory (or file) paths
|
||||||
|
```bash
|
||||||
|
python utils/split_doctest_jobs.py --only_return_keys --num_splits 4
|
||||||
|
```
|
||||||
|
(this is used to allow GitHub Actions to generate more than 256 jobs using matrix)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tests_fetcher import get_all_doctest_files
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--only_return_keys",
|
||||||
|
action="store_true",
|
||||||
|
help="if to only return the keys (which is a list of list of files' directory or file paths).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_splits",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="the number of splits into which the (flat) list of direcotry/file paths will be split. This has effect only if `only_return_keys` is `True`.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
all_doctest_files = get_all_doctest_files()
|
||||||
|
|
||||||
|
raw_test_collection_map = defaultdict(list)
|
||||||
|
|
||||||
|
for file in all_doctest_files:
|
||||||
|
file_dir = "/".join(Path(file).parents[0].parts)
|
||||||
|
raw_test_collection_map[file_dir].append(file)
|
||||||
|
|
||||||
|
refined_test_collection_map = {}
|
||||||
|
for file_dir in raw_test_collection_map.keys():
|
||||||
|
if file_dir in ["docs/source/en/model_doc", "docs/source/en/tasks"]:
|
||||||
|
for file in raw_test_collection_map[file_dir]:
|
||||||
|
refined_test_collection_map[file] = file
|
||||||
|
else:
|
||||||
|
refined_test_collection_map[file_dir] = " ".join(sorted(raw_test_collection_map[file_dir]))
|
||||||
|
|
||||||
|
sorted_file_dirs = sorted(refined_test_collection_map.keys())
|
||||||
|
|
||||||
|
test_collection_map = {}
|
||||||
|
for file_dir in sorted_file_dirs:
|
||||||
|
test_collection_map[file_dir] = refined_test_collection_map[file_dir]
|
||||||
|
|
||||||
|
num_jobs = len(test_collection_map)
|
||||||
|
num_jobs_per_splits = num_jobs // args.num_splits
|
||||||
|
|
||||||
|
file_directory_splits = []
|
||||||
|
end = 0
|
||||||
|
for idx in range(args.num_splits):
|
||||||
|
start = end
|
||||||
|
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
|
||||||
|
file_directory_splits.append(sorted_file_dirs[start:end])
|
||||||
|
|
||||||
|
if args.only_return_keys:
|
||||||
|
print(file_directory_splits)
|
||||||
|
else:
|
||||||
|
print(dict(test_collection_map))
|
@ -502,7 +502,10 @@ def get_all_doctest_files() -> List[str]:
|
|||||||
"""
|
"""
|
||||||
py_files = [str(x.relative_to(PATH_TO_REPO)) for x in PATH_TO_REPO.glob("**/*.py")]
|
py_files = [str(x.relative_to(PATH_TO_REPO)) for x in PATH_TO_REPO.glob("**/*.py")]
|
||||||
md_files = [str(x.relative_to(PATH_TO_REPO)) for x in PATH_TO_REPO.glob("**/*.md")]
|
md_files = [str(x.relative_to(PATH_TO_REPO)) for x in PATH_TO_REPO.glob("**/*.md")]
|
||||||
|
|
||||||
test_files_to_run = py_files + md_files
|
test_files_to_run = py_files + md_files
|
||||||
|
# change to use "/" as path separator
|
||||||
|
test_files_to_run = ["/".join(Path(x).parts) for x in test_files_to_run]
|
||||||
|
|
||||||
# only include files in `src` or `docs/source/en/`
|
# only include files in `src` or `docs/source/en/`
|
||||||
test_files_to_run = [x for x in test_files_to_run if x.startswith(("src/", "docs/source/en/"))]
|
test_files_to_run = [x for x in test_files_to_run if x.startswith(("src/", "docs/source/en/"))]
|
||||||
|
Loading…
Reference in New Issue
Block a user