Document toc check and doctest check scripts (#25319)

* Clean doc toc check and make doctest list better

* Add to Makefile
This commit is contained in:
Sylvain Gugger 2023-08-04 16:24:04 +02:00 committed by GitHub
parent ce6d153a53
commit fdaef3368b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 9 deletions

View File

@ -80,6 +80,7 @@ fix-copies:
python utils/check_copies.py --fix_and_overwrite python utils/check_copies.py --fix_and_overwrite
python utils/check_table.py --fix_and_overwrite python utils/check_table.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite
python utils/check_doctest_list.py --fix_and_overwrite
python utils/check_task_guides.py --fix_and_overwrite python utils/check_task_guides.py --fix_and_overwrite
# Run tests for the library # Run tests for the library

View File

@ -12,6 +12,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting
the entries in alphabetical order.
Usage (from the root of the repo):
Check that the table of content is properly sorted (used in `make quality`):
```bash
python utils/check_doc_toc.py
```
Auto-sort the table of content if it is not properly sorted (used in `make style`):
```bash
python utils/check_doc_toc.py --fix_and_overwrite
```
"""
import argparse import argparse
from collections import defaultdict from collections import defaultdict
@ -24,7 +43,15 @@ PATH_TO_TOC = "docs/source/en/_toctree.yml"
def clean_model_doc_toc(model_doc): def clean_model_doc_toc(model_doc):
""" """
Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically. Cleans a section of the table of content of the model documentation (one specific modality) by removing duplicates
and sorting models alphabetically.
Args:
model_doc (`List[dict]`):
The list of dictionaries extracted from the `_toctree.yml` file for this specific modality.
Returns:
`List[dict]`: List of dictionaries like the input, but cleaned up and sorted.
""" """
counts = defaultdict(int) counts = defaultdict(int)
for doc in model_doc: for doc in model_doc:
@ -51,6 +78,14 @@ def clean_model_doc_toc(model_doc):
def check_model_doc(overwrite=False): def check_model_doc(overwrite=False):
"""
Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model
API doc) and potentially auto-cleans it.
Args:
overwrite (`bool`, *optional*, defaults to `False`):
Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`).
"""
with open(PATH_TO_TOC, encoding="utf-8") as f: with open(PATH_TO_TOC, encoding="utf-8") as f:
content = yaml.safe_load(f.read()) content = yaml.safe_load(f.read())
@ -67,6 +102,7 @@ def check_model_doc(overwrite=False):
model_doc = api_doc[model_idx]["sections"] model_doc = api_doc[model_idx]["sections"]
# Extract the modalities and clean them one by one.
modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section] modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section]
diff = False diff = False
for idx, modality_doc in modalities_docs: for idx, modality_doc in modalities_docs:

View File

@ -12,28 +12,65 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in
alphabetical order.
Usage (from the root of the repo):
Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`):
```bash
python utils/check_doctest_list.py
```
Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`):
```bash
python utils/check_doctest_list.py --fix_and_overwrite
```
"""
import argparse
import os import os
# All paths are set with the intent you should run this script from the root of the repo with the command # All paths are set with the intent you should run this script from the root of the repo with the command
# python utils/check_doctest_list.py # python utils/check_doctest_list.py
REPO_PATH = "." REPO_PATH = "."
DOCTEST_FILE_PATHS = ["documentation_tests.txt", "slow_documentation_tests.txt"]
if __name__ == "__main__": def clean_doctest_list(doctest_file, overwrite=False):
doctest_file_path = os.path.join(REPO_PATH, "utils/documentation_tests.txt")
non_existent_paths = [] non_existent_paths = []
all_paths = [] all_paths = []
with open(doctest_file_path) as fp: with open(doctest_file, "r", encoding="utf-8") as f:
for line in fp: for line in f:
line = line.strip() line = line.strip()
path = os.path.join(REPO_PATH, line) path = os.path.join(REPO_PATH, line)
if not (os.path.isfile(path) or os.path.isdir(path)): if not (os.path.isfile(path) or os.path.isdir(path)):
non_existent_paths.append(line) non_existent_paths.append(line)
all_paths.append(path) all_paths.append(line)
if len(non_existent_paths) > 0: if len(non_existent_paths) > 0:
non_existent_paths = "\n".join(non_existent_paths) non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths])
raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}") raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}")
if all_paths != sorted(all_paths):
raise ValueError("Files in `utils/documentation_tests.txt` are not in alphabetical order.") sorted_paths = sorted(all_paths)
if all_paths != sorted_paths:
if not overwrite:
raise ValueError(
f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix "
"this automatically."
)
with open(doctest_file, "w", encoding="utf-8") as f:
f.write("\n".join(sorted_paths) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()
for doctest_file in DOCTEST_FILE_PATHS:
doctest_file = os.path.join(REPO_PATH, "utils", doctest_file)
clean_doctest_list(doctest_file, args.fix_and_overwrite)