mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Document toc check and doctest check scripts (#25319)
* Clean doc toc check and make doctest list better * Add to Makefile
This commit is contained in:
parent
ce6d153a53
commit
fdaef3368b
1
Makefile
1
Makefile
@ -80,6 +80,7 @@ fix-copies:
|
|||||||
python utils/check_copies.py --fix_and_overwrite
|
python utils/check_copies.py --fix_and_overwrite
|
||||||
python utils/check_table.py --fix_and_overwrite
|
python utils/check_table.py --fix_and_overwrite
|
||||||
python utils/check_dummies.py --fix_and_overwrite
|
python utils/check_dummies.py --fix_and_overwrite
|
||||||
|
python utils/check_doctest_list.py --fix_and_overwrite
|
||||||
python utils/check_task_guides.py --fix_and_overwrite
|
python utils/check_task_guides.py --fix_and_overwrite
|
||||||
|
|
||||||
# Run tests for the library
|
# Run tests for the library
|
||||||
|
@ -12,6 +12,25 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting
|
||||||
|
the entries in alphabetical order.
|
||||||
|
|
||||||
|
Usage (from the root of the repo):
|
||||||
|
|
||||||
|
Check that the table of content is properly sorted (used in `make quality`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python utils/check_doc_toc.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Auto-sort the table of content if it is not properly sorted (used in `make style`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python utils/check_doc_toc.py --fix_and_overwrite
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@ -24,7 +43,15 @@ PATH_TO_TOC = "docs/source/en/_toctree.yml"
|
|||||||
|
|
||||||
def clean_model_doc_toc(model_doc):
|
def clean_model_doc_toc(model_doc):
|
||||||
"""
|
"""
|
||||||
Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically.
|
Cleans a section of the table of content of the model documentation (one specific modality) by removing duplicates
|
||||||
|
and sorting models alphabetically.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_doc (`List[dict]`):
|
||||||
|
The list of dictionaries extracted from the `_toctree.yml` file for this specific modality.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`List[dict]`: List of dictionaries like the input, but cleaned up and sorted.
|
||||||
"""
|
"""
|
||||||
counts = defaultdict(int)
|
counts = defaultdict(int)
|
||||||
for doc in model_doc:
|
for doc in model_doc:
|
||||||
@ -51,6 +78,14 @@ def clean_model_doc_toc(model_doc):
|
|||||||
|
|
||||||
|
|
||||||
def check_model_doc(overwrite=False):
|
def check_model_doc(overwrite=False):
|
||||||
|
"""
|
||||||
|
Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model
|
||||||
|
API doc) and potentially auto-cleans it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
overwrite (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`).
|
||||||
|
"""
|
||||||
with open(PATH_TO_TOC, encoding="utf-8") as f:
|
with open(PATH_TO_TOC, encoding="utf-8") as f:
|
||||||
content = yaml.safe_load(f.read())
|
content = yaml.safe_load(f.read())
|
||||||
|
|
||||||
@ -67,6 +102,7 @@ def check_model_doc(overwrite=False):
|
|||||||
|
|
||||||
model_doc = api_doc[model_idx]["sections"]
|
model_doc = api_doc[model_idx]["sections"]
|
||||||
|
|
||||||
|
# Extract the modalities and clean them one by one.
|
||||||
modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section]
|
modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section]
|
||||||
diff = False
|
diff = False
|
||||||
for idx, modality_doc in modalities_docs:
|
for idx, modality_doc in modalities_docs:
|
||||||
|
@ -12,28 +12,65 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in
|
||||||
|
alphabetical order.
|
||||||
|
|
||||||
|
Usage (from the root of the repo):
|
||||||
|
|
||||||
|
Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python utils/check_doctest_list.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python utils/check_doctest_list.py --fix_and_overwrite
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
# All paths are set with the intent you should run this script from the root of the repo with the command
|
# All paths are set with the intent you should run this script from the root of the repo with the command
|
||||||
# python utils/check_doctest_list.py
|
# python utils/check_doctest_list.py
|
||||||
REPO_PATH = "."
|
REPO_PATH = "."
|
||||||
|
DOCTEST_FILE_PATHS = ["documentation_tests.txt", "slow_documentation_tests.txt"]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def clean_doctest_list(doctest_file, overwrite=False):
|
||||||
doctest_file_path = os.path.join(REPO_PATH, "utils/documentation_tests.txt")
|
|
||||||
non_existent_paths = []
|
non_existent_paths = []
|
||||||
all_paths = []
|
all_paths = []
|
||||||
with open(doctest_file_path) as fp:
|
with open(doctest_file, "r", encoding="utf-8") as f:
|
||||||
for line in fp:
|
for line in f:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
path = os.path.join(REPO_PATH, line)
|
path = os.path.join(REPO_PATH, line)
|
||||||
if not (os.path.isfile(path) or os.path.isdir(path)):
|
if not (os.path.isfile(path) or os.path.isdir(path)):
|
||||||
non_existent_paths.append(line)
|
non_existent_paths.append(line)
|
||||||
all_paths.append(path)
|
all_paths.append(line)
|
||||||
|
|
||||||
if len(non_existent_paths) > 0:
|
if len(non_existent_paths) > 0:
|
||||||
non_existent_paths = "\n".join(non_existent_paths)
|
non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths])
|
||||||
raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}")
|
raise ValueError(f"`utils/documentation_tests.txt` contains non-existent paths:\n{non_existent_paths}")
|
||||||
if all_paths != sorted(all_paths):
|
|
||||||
raise ValueError("Files in `utils/documentation_tests.txt` are not in alphabetical order.")
|
sorted_paths = sorted(all_paths)
|
||||||
|
if all_paths != sorted_paths:
|
||||||
|
if not overwrite:
|
||||||
|
raise ValueError(
|
||||||
|
f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix "
|
||||||
|
"this automatically."
|
||||||
|
)
|
||||||
|
with open(doctest_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(sorted_paths) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
for doctest_file in DOCTEST_FILE_PATHS:
|
||||||
|
doctest_file = os.path.join(REPO_PATH, "utils", doctest_file)
|
||||||
|
clean_doctest_list(doctest_file, args.fix_and_overwrite)
|
||||||
|
Loading…
Reference in New Issue
Block a user