From fdaef3368b7495f6d3f26739fece0ee370fa7ce6 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 4 Aug 2023 16:24:04 +0200 Subject: [PATCH] Document toc check and doctest check scripts (#25319) * Clean doc toc check and make doctest list better * Add to Makefile --- Makefile | 1 + utils/check_doc_toc.py | 38 +++++++++++++++++++++++++- utils/check_doctest_list.py | 53 +++++++++++++++++++++++++++++++------ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 25ab889148b..2c2f3786f7c 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,7 @@ fix-copies: python utils/check_copies.py --fix_and_overwrite python utils/check_table.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite + python utils/check_doctest_list.py --fix_and_overwrite python utils/check_task_guides.py --fix_and_overwrite # Run tests for the library diff --git a/utils/check_doc_toc.py b/utils/check_doc_toc.py index a01804284c3..83c6be47953 100644 --- a/utils/check_doc_toc.py +++ b/utils/check_doc_toc.py @@ -12,6 +12,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting +the entries in alphabetical order. + +Usage (from the root of the repo): + +Check that the table of content is properly sorted (used in `make quality`): + +```bash +python utils/check_doc_toc.py +``` + +Auto-sort the table of content if it is not properly sorted (used in `make style`): + +```bash +python utils/check_doc_toc.py --fix_and_overwrite +``` +""" + import argparse from collections import defaultdict @@ -24,7 +43,15 @@ PATH_TO_TOC = "docs/source/en/_toctree.yml" def clean_model_doc_toc(model_doc): """ - Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically. + Cleans a section of the table of content of the model documentation (one specific modality) by removing duplicates + and sorting models alphabetically. + + Args: + model_doc (`List[dict]`): + The list of dictionaries extracted from the `_toctree.yml` file for this specific modality. + + Returns: + `List[dict]`: List of dictionaries like the input, but cleaned up and sorted. """ counts = defaultdict(int) for doc in model_doc: @@ -51,6 +78,14 @@ def clean_model_doc_toc(model_doc): def check_model_doc(overwrite=False): + """ + Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model + API doc) and potentially auto-cleans it. + + Args: + overwrite (`bool`, *optional*, defaults to `False`): + Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`). + """ with open(PATH_TO_TOC, encoding="utf-8") as f: content = yaml.safe_load(f.read()) @@ -67,6 +102,7 @@ def check_model_doc(overwrite=False): model_doc = api_doc[model_idx]["sections"] + # Extract the modalities and clean them one by one. modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section] diff = False for idx, modality_doc in modalities_docs: diff --git a/utils/check_doctest_list.py b/utils/check_doctest_list.py index 13ef8c52d59..3815a2bda0b 100644 --- a/utils/check_doctest_list.py +++ b/utils/check_doctest_list.py @@ -12,28 +12,65 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in +alphabetical order. +Usage (from the root of the repo): + +Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`): + +```bash +python utils/check_doctest_list.py +``` + +Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`): + +```bash +python utils/check_doctest_list.py --fix_and_overwrite +``` +""" +import argparse import os # All paths are set with the intent you should run this script from the root of the repo with the command # python utils/check_doctest_list.py REPO_PATH = "." +DOCTEST_FILE_PATHS = ["documentation_tests.txt", "slow_documentation_tests.txt"] -if __name__ == "__main__": - doctest_file_path = os.path.join(REPO_PATH, "utils/documentation_tests.txt") +def clean_doctest_list(doctest_file, overwrite=False): non_existent_paths = [] all_paths = [] - with open(doctest_file_path) as fp: - for line in fp: + with open(doctest_file, "r", encoding="utf-8") as f: + for line in f: line = line.strip() path = os.path.join(REPO_PATH, line) if not (os.path.isfile(path) or os.path.isdir(path)): non_existent_paths.append(line) - all_paths.append(path) + all_paths.append(line) + if len(non_existent_paths) > 0: - non_existent_paths = "\n".join(non_existent_paths) + non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths]) raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}") - if all_paths != sorted(all_paths): - raise ValueError("Files in `utils/documentation_tests.txt` are not in alphabetical order.") + + sorted_paths = sorted(all_paths) + if all_paths != sorted_paths: + if not overwrite: + raise ValueError( + f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix " + "this automatically." + ) + with open(doctest_file, "w", encoding="utf-8") as f: + f.write("\n".join(sorted_paths) + "\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") + args = parser.parse_args() + + for doctest_file in DOCTEST_FILE_PATHS: + doctest_file = os.path.join(REPO_PATH, "utils", doctest_file) + clean_doctest_list(doctest_file, args.fix_and_overwrite)