diff --git a/.circleci/config.yml b/.circleci/config.yml index ae6742563f6..c51141efa98 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -862,6 +862,7 @@ jobs: - run: python utils/sort_auto_mappings.py --check_only - run: flake8 examples tests src utils - run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source + - run: python utils/check_doc_toc.py check_repository_consistency: working_directory: ~/transformers diff --git a/Makefile b/Makefile index f0abc15de8e..6c6200cfe72 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ quality: python utils/sort_auto_mappings.py --check_only flake8 $(check_dirs) doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source + python utils/check_doc_toc.py # Format source code automatically and check is there are any problems left that need manual fixing @@ -58,6 +59,7 @@ extra_style_checks: python utils/custom_init_isort.py python utils/sort_auto_mappings.py doc-builder style src/transformers docs/source --max_len 119 --path_to_docs docs/source + python utils/check_doc_toc.py --fix_and_overwrite # this target runs checks on all files and potentially modifies some of them diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 847dfd34fe0..a8cd1e35a5a 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -22,7 +22,7 @@ title: Tutorials - sections: - local: fast_tokenizers - title: "Use tokenizers from 🤗 Tokenizers" + title: Use tokenizers from 🤗 Tokenizers - local: create_a_model title: Create a custom architecture - local: custom_models @@ -94,15 +94,15 @@ - local: debugging title: Debugging - local: notebooks - title: "🤗 Transformers Notebooks" + title: 🤗 Transformers Notebooks - local: community title: Community - local: contributing title: How to contribute to transformers? - local: add_new_model - title: "How to add a model to 🤗 Transformers?" + title: How to add a model to 🤗 Transformers? - local: add_new_pipeline - title: "How to add a pipeline to 🤗 Transformers?" + title: How to add a pipeline to 🤗 Transformers? - local: testing title: Testing - local: pr_checks @@ -254,14 +254,14 @@ title: GLPN - local: model_doc/openai-gpt title: GPT - - local: model_doc/gpt2 - title: GPT2 - - local: model_doc/gptj - title: GPT-J - local: model_doc/gpt_neo title: GPT Neo - local: model_doc/gpt_neox title: GPT NeoX + - local: model_doc/gptj + title: GPT-J + - local: model_doc/gpt2 + title: GPT2 - local: model_doc/groupvit title: GroupViT - local: model_doc/herbert diff --git a/utils/check_doc_toc.py b/utils/check_doc_toc.py new file mode 100644 index 00000000000..ae0389d60f0 --- /dev/null +++ b/utils/check_doc_toc.py @@ -0,0 +1,88 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from collections import defaultdict + +import yaml + + +PATH_TO_TOC = "docs/source/en/_toctree.yml" + + +def clean_model_doc_toc(model_doc): + """ + Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically. + """ + counts = defaultdict(int) + for doc in model_doc: + counts[doc["local"]] += 1 + duplicates = [key for key, value in counts.items() if value > 1] + + new_doc = [] + for duplicate_key in duplicates: + titles = list(set(doc["title"] for doc in model_doc if doc["local"] == duplicate_key)) + if len(titles) > 1: + raise ValueError( + f"{duplicate_key} is present several times in the documentation table of content at " + "`docs/source/en/_toctree.yml` with different *Title* values. Choose one of those and remove the " + "others." + ) + # Only add this once + new_doc.append({"local": duplicate_key, "title": titles[0]}) + + # Add none duplicate-keys + new_doc.extend([doc for doc in model_doc if counts[doc["local"]] == 1]) + + # Sort + return sorted(new_doc, key=lambda s: s["title"].lower()) + + +def check_model_doc(overwrite=False): + with open(PATH_TO_TOC, encoding="utf-8") as f: + content = yaml.safe_load(f.read()) + + # Get to the API doc + api_idx = 0 + while content[api_idx]["title"] != "API": + api_idx += 1 + api_doc = content[api_idx]["sections"] + + # Then to the model doc + model_idx = 0 + while api_doc[model_idx]["title"] != "Models": + model_idx += 1 + + old_model_doc = api_doc[model_idx]["sections"] + new_model_doc = clean_model_doc_toc(old_model_doc) + + if old_model_doc != new_model_doc: + if overwrite: + api_doc[model_idx]["sections"] = new_model_doc + content[api_idx]["sections"] = api_doc + with open(PATH_TO_TOC, "w", encoding="utf-8") as f: + f.write(yaml.dump(content, allow_unicode=True)) + else: + raise ValueError( + "The model doc part of the table of content is not properly sorted, run `make style` to fix this." + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") + args = parser.parse_args() + + check_model_doc(args.fix_and_overwrite)