mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Document toc check and doctest check scripts (#25319)
* Clean doc toc check and make doctest list better * Add to Makefile
This commit is contained in:
parent
ce6d153a53
commit
fdaef3368b
1
Makefile
1
Makefile
@ -80,6 +80,7 @@ fix-copies:
|
||||
python utils/check_copies.py --fix_and_overwrite
|
||||
python utils/check_table.py --fix_and_overwrite
|
||||
python utils/check_dummies.py --fix_and_overwrite
|
||||
python utils/check_doctest_list.py --fix_and_overwrite
|
||||
python utils/check_task_guides.py --fix_and_overwrite
|
||||
|
||||
# Run tests for the library
|
||||
|
@ -12,6 +12,25 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting
|
||||
the entries in alphabetical order.
|
||||
|
||||
Usage (from the root of the repo):
|
||||
|
||||
Check that the table of content is properly sorted (used in `make quality`):
|
||||
|
||||
```bash
|
||||
python utils/check_doc_toc.py
|
||||
```
|
||||
|
||||
Auto-sort the table of content if it is not properly sorted (used in `make style`):
|
||||
|
||||
```bash
|
||||
python utils/check_doc_toc.py --fix_and_overwrite
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
@ -24,7 +43,15 @@ PATH_TO_TOC = "docs/source/en/_toctree.yml"
|
||||
|
||||
def clean_model_doc_toc(model_doc):
|
||||
"""
|
||||
Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically.
|
||||
Cleans a section of the table of content of the model documentation (one specific modality) by removing duplicates
|
||||
and sorting models alphabetically.
|
||||
|
||||
Args:
|
||||
model_doc (`List[dict]`):
|
||||
The list of dictionaries extracted from the `_toctree.yml` file for this specific modality.
|
||||
|
||||
Returns:
|
||||
`List[dict]`: List of dictionaries like the input, but cleaned up and sorted.
|
||||
"""
|
||||
counts = defaultdict(int)
|
||||
for doc in model_doc:
|
||||
@ -51,6 +78,14 @@ def clean_model_doc_toc(model_doc):
|
||||
|
||||
|
||||
def check_model_doc(overwrite=False):
|
||||
"""
|
||||
Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model
|
||||
API doc) and potentially auto-cleans it.
|
||||
|
||||
Args:
|
||||
overwrite (`bool`, *optional*, defaults to `False`):
|
||||
Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`).
|
||||
"""
|
||||
with open(PATH_TO_TOC, encoding="utf-8") as f:
|
||||
content = yaml.safe_load(f.read())
|
||||
|
||||
@ -67,6 +102,7 @@ def check_model_doc(overwrite=False):
|
||||
|
||||
model_doc = api_doc[model_idx]["sections"]
|
||||
|
||||
# Extract the modalities and clean them one by one.
|
||||
modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section]
|
||||
diff = False
|
||||
for idx, modality_doc in modalities_docs:
|
||||
|
@ -12,28 +12,65 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in
|
||||
alphabetical order.
|
||||
|
||||
Usage (from the root of the repo):
|
||||
|
||||
Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`):
|
||||
|
||||
```bash
|
||||
python utils/check_doctest_list.py
|
||||
```
|
||||
|
||||
Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`):
|
||||
|
||||
```bash
|
||||
python utils/check_doctest_list.py --fix_and_overwrite
|
||||
```
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
# All paths are set with the intent you should run this script from the root of the repo with the command
|
||||
# python utils/check_doctest_list.py
|
||||
REPO_PATH = "."
|
||||
DOCTEST_FILE_PATHS = ["documentation_tests.txt", "slow_documentation_tests.txt"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
doctest_file_path = os.path.join(REPO_PATH, "utils/documentation_tests.txt")
|
||||
def clean_doctest_list(doctest_file, overwrite=False):
|
||||
non_existent_paths = []
|
||||
all_paths = []
|
||||
with open(doctest_file_path) as fp:
|
||||
for line in fp:
|
||||
with open(doctest_file, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
path = os.path.join(REPO_PATH, line)
|
||||
if not (os.path.isfile(path) or os.path.isdir(path)):
|
||||
non_existent_paths.append(line)
|
||||
all_paths.append(path)
|
||||
all_paths.append(line)
|
||||
|
||||
if len(non_existent_paths) > 0:
|
||||
non_existent_paths = "\n".join(non_existent_paths)
|
||||
non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths])
|
||||
raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}")
|
||||
if all_paths != sorted(all_paths):
|
||||
raise ValueError("Files in `utils/documentation_tests.txt` are not in alphabetical order.")
|
||||
|
||||
sorted_paths = sorted(all_paths)
|
||||
if all_paths != sorted_paths:
|
||||
if not overwrite:
|
||||
raise ValueError(
|
||||
f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix "
|
||||
"this automatically."
|
||||
)
|
||||
with open(doctest_file, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(sorted_paths) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
|
||||
args = parser.parse_args()
|
||||
|
||||
for doctest_file in DOCTEST_FILE_PATHS:
|
||||
doctest_file = os.path.join(REPO_PATH, "utils", doctest_file)
|
||||
clean_doctest_list(doctest_file, args.fix_and_overwrite)
|
||||
|
Loading…
Reference in New Issue
Block a user