mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Bring back the PR Refactor doctests + add CI
to main
(#23271)
* Revert "Revert "[Doctests] Refactor doctests + add CI" (#23245)"
This reverts commit 69ee46243c
.
* try not expose HfDocTestParser
* move into testing_utils.py
* remove pytest install
---------
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
b2846afda8
commit
9088fcae82
@ -51,6 +51,8 @@ class CircleCIJob:
|
|||||||
resource_class: Optional[str] = "xlarge"
|
resource_class: Optional[str] = "xlarge"
|
||||||
tests_to_run: Optional[List[str]] = None
|
tests_to_run: Optional[List[str]] = None
|
||||||
working_directory: str = "~/transformers"
|
working_directory: str = "~/transformers"
|
||||||
|
# This should be only used for doctest job!
|
||||||
|
command_timeout: Optional[int] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# Deal with defaults for mutable attributes.
|
# Deal with defaults for mutable attributes.
|
||||||
@ -107,11 +109,15 @@ class CircleCIJob:
|
|||||||
steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})
|
steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})
|
||||||
|
|
||||||
all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
|
all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
|
||||||
pytest_flags = [f"--{key}={value}" if value is not None else f"-{key}" for key, value in all_options.items()]
|
pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
|
||||||
pytest_flags.append(
|
pytest_flags.append(
|
||||||
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
|
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
|
||||||
)
|
)
|
||||||
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
|
test_command = ""
|
||||||
|
if self.command_timeout:
|
||||||
|
test_command = f"timeout {self.command_timeout} "
|
||||||
|
test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
|
||||||
|
|
||||||
if self.parallelism == 1:
|
if self.parallelism == 1:
|
||||||
if self.tests_to_run is None:
|
if self.tests_to_run is None:
|
||||||
test_command += " << pipeline.parameters.tests_to_run >>"
|
test_command += " << pipeline.parameters.tests_to_run >>"
|
||||||
@ -161,12 +167,37 @@ class CircleCIJob:
|
|||||||
steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
|
steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
|
||||||
steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
|
steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
|
||||||
|
|
||||||
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
|
test_command = ""
|
||||||
|
if self.timeout:
|
||||||
|
test_command = f"timeout {self.timeout} "
|
||||||
|
test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
|
||||||
test_command += " $(cat splitted_tests.txt)"
|
test_command += " $(cat splitted_tests.txt)"
|
||||||
if self.marker is not None:
|
if self.marker is not None:
|
||||||
test_command += f" -m {self.marker}"
|
test_command += f" -m {self.marker}"
|
||||||
test_command += " | tee tests_output.txt"
|
|
||||||
|
if self.name == "pr_documentation_tests":
|
||||||
|
# can't use ` | tee tee tests_output.txt` as usual
|
||||||
|
test_command += " > tests_output.txt"
|
||||||
|
# Save the return code, so we can check if it is timeout in the next step.
|
||||||
|
test_command += '; touch "$?".txt'
|
||||||
|
# Never fail the test step for the doctest job. We will check the results in the next step, and fail that
|
||||||
|
# step instead if the actual test failures are found. This is to avoid the timeout being reported as test
|
||||||
|
# failure.
|
||||||
|
test_command = f"({test_command}) || true"
|
||||||
|
else:
|
||||||
|
test_command += " | tee tests_output.txt"
|
||||||
steps.append({"run": {"name": "Run tests", "command": test_command}})
|
steps.append({"run": {"name": "Run tests", "command": test_command}})
|
||||||
|
|
||||||
|
# return code `124` means the previous (pytest run) step is timeout
|
||||||
|
if self.name == "pr_documentation_tests":
|
||||||
|
checkout_doctest_command = 'if [ -s reports/tests_pr_documentation_tests/failures_short.txt ]; '
|
||||||
|
checkout_doctest_command += 'then echo "some test failed"; '
|
||||||
|
checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/failures_short.txt; '
|
||||||
|
checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/summary_short.txt; exit -1; '
|
||||||
|
checkout_doctest_command += 'elif [ -s reports/tests_pr_documentation_tests/stats.txt ]; then echo "All tests pass!"; '
|
||||||
|
checkout_doctest_command += 'elif [ -f 124.txt ]; then echo "doctest timeout!"; else echo "other fatal error)"; exit -1; fi;'
|
||||||
|
steps.append({"run": {"name": "Check doctest results", "command": checkout_doctest_command}})
|
||||||
|
|
||||||
steps.append({"store_artifacts": {"path": "~/transformers/tests_output.txt"}})
|
steps.append({"store_artifacts": {"path": "~/transformers/tests_output.txt"}})
|
||||||
steps.append({"store_artifacts": {"path": "~/transformers/reports"}})
|
steps.append({"store_artifacts": {"path": "~/transformers/reports"}})
|
||||||
job["steps"] = steps
|
job["steps"] = steps
|
||||||
@ -401,6 +432,51 @@ repo_utils_job = CircleCIJob(
|
|||||||
tests_to_run="tests/repo_utils",
|
tests_to_run="tests/repo_utils",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# At this moment, only the files that are in `utils/documentation_tests.txt` will be kept (together with a dummy file).
|
||||||
|
py_command = 'import os; import json; fp = open("pr_documentation_tests.txt"); data_1 = fp.read().strip().split("\\n"); fp = open("utils/documentation_tests.txt"); data_2 = fp.read().strip().split("\\n"); to_test = [x for x in data_1 if x in set(data_2)] + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
|
||||||
|
py_command = f"$(python3 -c '{py_command}')"
|
||||||
|
command = f'echo "{py_command}" > pr_documentation_tests_filtered.txt'
|
||||||
|
doc_test_job = CircleCIJob(
|
||||||
|
"pr_documentation_tests",
|
||||||
|
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
|
||||||
|
install_steps=[
|
||||||
|
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
|
||||||
|
"pip install --upgrade pip",
|
||||||
|
"pip install -e .[dev]",
|
||||||
|
"pip install git+https://github.com/huggingface/accelerate",
|
||||||
|
"pip install --upgrade pytest pytest-sugar",
|
||||||
|
"find -name __pycache__ -delete",
|
||||||
|
"find . -name \*.pyc -delete",
|
||||||
|
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
|
||||||
|
"touch dummy.py",
|
||||||
|
{
|
||||||
|
"name": "Get files to test",
|
||||||
|
"command":
|
||||||
|
"git remote add upstream https://github.com/huggingface/transformers.git && git fetch upstream \n"
|
||||||
|
"git diff --name-only --relative --diff-filter=AMR refs/remotes/upstream/main...HEAD | grep -E '\.(py|mdx)$' | grep -Ev '^\..*|/\.' | grep -Ev '__' > pr_documentation_tests.txt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "List files beings changed: pr_documentation_tests.txt",
|
||||||
|
"command":
|
||||||
|
"cat pr_documentation_tests.txt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Filter pr_documentation_tests.txt",
|
||||||
|
"command":
|
||||||
|
command
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "List files beings tested: pr_documentation_tests_filtered.txt",
|
||||||
|
"command":
|
||||||
|
"cat pr_documentation_tests_filtered.txt"
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tests_to_run="$(cat pr_documentation_tests_filtered.txt)", # noqa
|
||||||
|
pytest_options={"-doctest-modules": None, "doctest-glob": "*.mdx", "dist": "loadfile", "rvsA": None},
|
||||||
|
command_timeout=1200, # test cannot run longer than 1200 seconds
|
||||||
|
pytest_num_workers=1,
|
||||||
|
)
|
||||||
|
|
||||||
REGULAR_TESTS = [
|
REGULAR_TESTS = [
|
||||||
torch_and_tf_job,
|
torch_and_tf_job,
|
||||||
torch_and_flax_job,
|
torch_and_flax_job,
|
||||||
@ -411,6 +487,7 @@ REGULAR_TESTS = [
|
|||||||
hub_job,
|
hub_job,
|
||||||
onnx_job,
|
onnx_job,
|
||||||
exotic_models_job,
|
exotic_models_job,
|
||||||
|
doc_test_job
|
||||||
]
|
]
|
||||||
EXAMPLES_TESTS = [
|
EXAMPLES_TESTS = [
|
||||||
examples_torch_job,
|
examples_torch_job,
|
||||||
|
8
.github/workflows/doctests.yml
vendored
8
.github/workflows/doctests.yml
vendored
@ -37,18 +37,10 @@ jobs:
|
|||||||
- name: Show installed libraries and their versions
|
- name: Show installed libraries and their versions
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
- name: Prepare files for doctests
|
|
||||||
run: |
|
|
||||||
python3 utils/prepare_for_doc_test.py src docs
|
|
||||||
|
|
||||||
- name: Run doctests
|
- name: Run doctests
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"
|
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"
|
||||||
|
|
||||||
- name: Clean files after doctests
|
|
||||||
run: |
|
|
||||||
python3 utils/prepare_for_doc_test.py src docs --remove_new_line
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
8
Makefile
8
Makefile
@ -47,10 +47,10 @@ repo-consistency:
|
|||||||
# this target runs checks on all files
|
# this target runs checks on all files
|
||||||
|
|
||||||
quality:
|
quality:
|
||||||
black --check $(check_dirs) setup.py
|
black --check $(check_dirs) setup.py conftest.py
|
||||||
python utils/custom_init_isort.py --check_only
|
python utils/custom_init_isort.py --check_only
|
||||||
python utils/sort_auto_mappings.py --check_only
|
python utils/sort_auto_mappings.py --check_only
|
||||||
ruff $(check_dirs) setup.py
|
ruff $(check_dirs) setup.py conftest.py
|
||||||
doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
||||||
python utils/check_doc_toc.py
|
python utils/check_doc_toc.py
|
||||||
|
|
||||||
@ -65,8 +65,8 @@ extra_style_checks:
|
|||||||
# this target runs checks on all files and potentially modifies some of them
|
# this target runs checks on all files and potentially modifies some of them
|
||||||
|
|
||||||
style:
|
style:
|
||||||
black $(check_dirs) setup.py
|
black $(check_dirs) setup.py conftest.py
|
||||||
ruff $(check_dirs) setup.py --fix
|
ruff $(check_dirs) setup.py conftest.py --fix
|
||||||
${MAKE} autogenerate_code
|
${MAKE} autogenerate_code
|
||||||
${MAKE} extra_style_checks
|
${MAKE} extra_style_checks
|
||||||
|
|
||||||
|
12
conftest.py
12
conftest.py
@ -20,6 +20,10 @@ import sys
|
|||||||
import warnings
|
import warnings
|
||||||
from os.path import abspath, dirname, join
|
from os.path import abspath, dirname, join
|
||||||
|
|
||||||
|
import _pytest
|
||||||
|
|
||||||
|
from transformers.testing_utils import HfDoctestModule, HfDocTestParser
|
||||||
|
|
||||||
|
|
||||||
# allow having multiple repository checkouts and not needing to remember to rerun
|
# allow having multiple repository checkouts and not needing to remember to rerun
|
||||||
# 'pip install -e .[dev]' when switching between checkouts and running tests.
|
# 'pip install -e .[dev]' when switching between checkouts and running tests.
|
||||||
@ -38,9 +42,7 @@ def pytest_configure(config):
|
|||||||
config.addinivalue_line(
|
config.addinivalue_line(
|
||||||
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
|
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
|
||||||
)
|
)
|
||||||
config.addinivalue_line(
|
config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
|
||||||
"markers", "is_pipeline_test: mark test to run only when pipelines are tested"
|
|
||||||
)
|
|
||||||
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
|
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
|
||||||
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
|
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
|
||||||
config.addinivalue_line("markers", "tool_tests: mark the tool tests that are run on their specific schedule")
|
config.addinivalue_line("markers", "tool_tests: mark the tool tests that are run on their specific schedule")
|
||||||
@ -67,7 +69,7 @@ def pytest_sessionfinish(session, exitstatus):
|
|||||||
|
|
||||||
|
|
||||||
# Doctest custom flag to ignore output.
|
# Doctest custom flag to ignore output.
|
||||||
IGNORE_RESULT = doctest.register_optionflag('IGNORE_RESULT')
|
IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
|
||||||
|
|
||||||
OutputChecker = doctest.OutputChecker
|
OutputChecker = doctest.OutputChecker
|
||||||
|
|
||||||
@ -80,3 +82,5 @@ class CustomOutputChecker(OutputChecker):
|
|||||||
|
|
||||||
|
|
||||||
doctest.OutputChecker = CustomOutputChecker
|
doctest.OutputChecker = CustomOutputChecker
|
||||||
|
_pytest.doctest.DoctestModule = HfDoctestModule
|
||||||
|
doctest.DocTestParser = HfDocTestParser
|
||||||
|
@ -212,20 +212,12 @@ Example:
|
|||||||
```"""
|
```"""
|
||||||
|
|
||||||
```
|
```
|
||||||
3 steps are required to debug the docstring examples:
|
|
||||||
1. In order to properly run the test, **an extra line has to be added** at the end of the docstring. This can be automatically done on any file using:
|
|
||||||
```bash
|
|
||||||
python utils/prepare_for_doc_test.py <path_to_file_or_dir>
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Then, you can use the following line to automatically test every docstring example in the desired file:
|
Just run the following line to automatically test every docstring example in the desired file:
|
||||||
```bash
|
```bash
|
||||||
pytest --doctest-modules <path_to_file_or_dir>
|
pytest --doctest-modules <path_to_file_or_dir>
|
||||||
```
|
```
|
||||||
3. Once you are done debugging, you need to remove the extra line added in step **1.** by running the following:
|
If the file has a markdown extention, you should add the `--doctest-glob="*.mdx"` argument.
|
||||||
```bash
|
|
||||||
python utils/prepare_for_doc_test.py <path_to_file_or_dir> --remove_new_line
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run only modified tests
|
### Run only modified tests
|
||||||
|
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
|
doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
|
||||||
|
doctest_glob=**/*.mdx
|
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import doctest
|
||||||
import functools
|
import functools
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
@ -30,11 +31,23 @@ import unittest
|
|||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator, List, Optional, Union
|
from typing import Iterable, Iterator, List, Optional, Union
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import huggingface_hub
|
import huggingface_hub
|
||||||
import requests
|
import requests
|
||||||
|
from _pytest.doctest import (
|
||||||
|
Module,
|
||||||
|
_get_checker,
|
||||||
|
_get_continue_on_failure,
|
||||||
|
_get_runner,
|
||||||
|
_is_mocked,
|
||||||
|
_patch_unwrap_mock_aware,
|
||||||
|
get_optionflags,
|
||||||
|
import_path,
|
||||||
|
)
|
||||||
|
from _pytest.outcomes import skip
|
||||||
|
from pytest import DoctestItem
|
||||||
|
|
||||||
from transformers import logging as transformers_logging
|
from transformers import logging as transformers_logging
|
||||||
|
|
||||||
@ -1812,3 +1825,162 @@ def run_test_in_subprocess(test_case, target_func, inputs=None, timeout=None):
|
|||||||
|
|
||||||
if results["error"] is not None:
|
if results["error"] is not None:
|
||||||
test_case.fail(f'{results["error"]}')
|
test_case.fail(f'{results["error"]}')
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
The following contains utils to run the documentation tests without having to overwrite any files.
|
||||||
|
|
||||||
|
The `preprocess_string` function adds `# doctest: +IGNORE_RESULT` markers on the fly anywhere a `load_dataset` call is
|
||||||
|
made as a print would otherwise fail the corresonding line.
|
||||||
|
|
||||||
|
To skip cuda tests, make sure to call `SKIP_CUDA_DOCTEST=1 pytest --doctest-modules <path_to_files_to_test>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_string(string, skip_cuda_tests):
|
||||||
|
"""Prepare a docstring or a `.mdx` file to be run by doctest.
|
||||||
|
|
||||||
|
The argument `string` would be the whole file content if it is a `.mdx` file. For a python file, it would be one of
|
||||||
|
its docstring. In each case, it may contain multiple python code examples. If `skip_cuda_tests` is `True` and a
|
||||||
|
cuda stuff is detective (with a heuristic), this method will return an empty string so no doctest will be run for
|
||||||
|
`string`.
|
||||||
|
"""
|
||||||
|
codeblock_pattern = r"(```(?:python|py)\s*\n\s*>>> )((?:.*?\n)*?.*?```)"
|
||||||
|
codeblocks = re.split(re.compile(codeblock_pattern, flags=re.MULTILINE | re.DOTALL), string)
|
||||||
|
is_cuda_found = False
|
||||||
|
for i, codeblock in enumerate(codeblocks):
|
||||||
|
if "load_dataset(" in codeblock and "# doctest: +IGNORE_RESULT" not in codeblock:
|
||||||
|
codeblocks[i] = re.sub(r"(>>> .*load_dataset\(.*)", r"\1 # doctest: +IGNORE_RESULT", codeblock)
|
||||||
|
if (
|
||||||
|
(">>>" in codeblock or "..." in codeblock)
|
||||||
|
and re.search(r"cuda|to\(0\)|device=0", codeblock)
|
||||||
|
and skip_cuda_tests
|
||||||
|
):
|
||||||
|
is_cuda_found = True
|
||||||
|
break
|
||||||
|
modified_string = ""
|
||||||
|
if not is_cuda_found:
|
||||||
|
modified_string = "".join(codeblocks)
|
||||||
|
return modified_string
|
||||||
|
|
||||||
|
|
||||||
|
class HfDocTestParser(doctest.DocTestParser):
|
||||||
|
"""
|
||||||
|
Overwrites the DocTestParser from doctest to properly parse the codeblocks that are formatted with black. This
|
||||||
|
means that there are no extra lines at the end of our snippets. The `# doctest: +IGNORE_RESULT` marker is also
|
||||||
|
added anywhere a `load_dataset` call is made as a print would otherwise fail the corresponding line.
|
||||||
|
|
||||||
|
Tests involving cuda are skipped base on a naive pattern that should be updated if it is not enough.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This regular expression is used to find doctest examples in a
|
||||||
|
# string. It defines three groups: `source` is the source code
|
||||||
|
# (including leading indentation and prompts); `indent` is the
|
||||||
|
# indentation of the first (PS1) line of the source code; and
|
||||||
|
# `want` is the expected output (including leading indentation).
|
||||||
|
# fmt: off
|
||||||
|
_EXAMPLE_RE = re.compile(r'''
|
||||||
|
# Source consists of a PS1 line followed by zero or more PS2 lines.
|
||||||
|
(?P<source>
|
||||||
|
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
|
||||||
|
(?:\n [ ]* \.\.\. .*)*) # PS2 lines
|
||||||
|
\n?
|
||||||
|
# Want consists of any non-blank lines that do not start with PS1.
|
||||||
|
(?P<want> (?:(?![ ]*$) # Not a blank line
|
||||||
|
(?![ ]*>>>) # Not a line starting with PS1
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
(?:(?!```).)* # Match any character except '`' until a '```' is found (this is specific to HF because black removes the last line)
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
(?:\n|$) # Match a new line or end of string
|
||||||
|
)*)
|
||||||
|
''', re.MULTILINE | re.VERBOSE
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
skip_cuda_tests: bool = bool(os.environ.get("SKIP_CUDA_DOCTEST", False))
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
|
||||||
|
def parse(self, string, name="<string>"):
|
||||||
|
"""
|
||||||
|
Overwrites the `parse` method to incorporate a skip for CUDA tests, and remove logs and dataset prints before
|
||||||
|
calling `super().parse`
|
||||||
|
"""
|
||||||
|
string = preprocess_string(string, self.skip_cuda_tests)
|
||||||
|
return super().parse(string, name)
|
||||||
|
|
||||||
|
|
||||||
|
class HfDoctestModule(Module):
|
||||||
|
"""
|
||||||
|
Overwrites the `DoctestModule` of the pytest package to make sure the HFDocTestParser is used when discovering
|
||||||
|
tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def collect(self) -> Iterable[DoctestItem]:
|
||||||
|
class MockAwareDocTestFinder(doctest.DocTestFinder):
|
||||||
|
"""A hackish doctest finder that overrides stdlib internals to fix a stdlib bug.
|
||||||
|
|
||||||
|
https://github.com/pytest-dev/pytest/issues/3456 https://bugs.python.org/issue25532
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _find_lineno(self, obj, source_lines):
|
||||||
|
"""Doctest code does not take into account `@property`, this
|
||||||
|
is a hackish way to fix it. https://bugs.python.org/issue17446
|
||||||
|
|
||||||
|
Wrapped Doctests will need to be unwrapped so the correct line number is returned. This will be
|
||||||
|
reported upstream. #8796
|
||||||
|
"""
|
||||||
|
if isinstance(obj, property):
|
||||||
|
obj = getattr(obj, "fget", obj)
|
||||||
|
|
||||||
|
if hasattr(obj, "__wrapped__"):
|
||||||
|
# Get the main obj in case of it being wrapped
|
||||||
|
obj = inspect.unwrap(obj)
|
||||||
|
|
||||||
|
# Type ignored because this is a private function.
|
||||||
|
return super()._find_lineno( # type:ignore[misc]
|
||||||
|
obj,
|
||||||
|
source_lines,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _find(self, tests, obj, name, module, source_lines, globs, seen) -> None:
|
||||||
|
if _is_mocked(obj):
|
||||||
|
return
|
||||||
|
with _patch_unwrap_mock_aware():
|
||||||
|
# Type ignored because this is a private function.
|
||||||
|
super()._find( # type:ignore[misc]
|
||||||
|
tests, obj, name, module, source_lines, globs, seen
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.path.name == "conftest.py":
|
||||||
|
module = self.config.pluginmanager._importconftest(
|
||||||
|
self.path,
|
||||||
|
self.config.getoption("importmode"),
|
||||||
|
rootpath=self.config.rootpath,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
module = import_path(
|
||||||
|
self.path,
|
||||||
|
root=self.config.rootpath,
|
||||||
|
mode=self.config.getoption("importmode"),
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
if self.config.getvalue("doctest_ignore_import_errors"):
|
||||||
|
skip("unable to import module %r" % self.path)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
finder = MockAwareDocTestFinder(parser=HfDocTestParser())
|
||||||
|
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||||
|
optionflags = get_optionflags(self)
|
||||||
|
runner = _get_runner(
|
||||||
|
verbose=False,
|
||||||
|
optionflags=optionflags,
|
||||||
|
checker=_get_checker(),
|
||||||
|
continue_on_failure=_get_continue_on_failure(self.config),
|
||||||
|
)
|
||||||
|
for test in finder.find(module, module.__name__):
|
||||||
|
if test.examples: # skip empty doctests and cuda
|
||||||
|
yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
|
||||||
|
@ -1,148 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 The HuggingFace Inc. team.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
""" Style utils to preprocess files for doc tests.
|
|
||||||
|
|
||||||
The doc precossing function can be run on a list of files and/org
|
|
||||||
directories of files. It will recursively check if the files have
|
|
||||||
a python code snippet by looking for a ```python or ```py syntax.
|
|
||||||
In the default mode - `remove_new_line==False` the script will
|
|
||||||
add a new line before every python code ending ``` line to make
|
|
||||||
the docstrings ready for pytest doctests.
|
|
||||||
However, we don't want to have empty lines displayed in the
|
|
||||||
official documentation which is why the new line command can be
|
|
||||||
reversed by adding the flag `--remove_new_line` which sets
|
|
||||||
`remove_new_line==True`.
|
|
||||||
|
|
||||||
When debugging the doc tests locally, please make sure to
|
|
||||||
always run:
|
|
||||||
|
|
||||||
```python utils/prepare_for_doc_test.py src docs```
|
|
||||||
|
|
||||||
before running the doc tests:
|
|
||||||
|
|
||||||
```pytest --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"```
|
|
||||||
|
|
||||||
Afterwards you should revert the changes by running
|
|
||||||
|
|
||||||
```python utils/prepare_for_doc_test.py src docs --remove_new_line```
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def process_code_block(code, add_new_line=True):
|
|
||||||
if add_new_line:
|
|
||||||
return maybe_append_new_line(code)
|
|
||||||
else:
|
|
||||||
return maybe_remove_new_line(code)
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_append_new_line(code):
|
|
||||||
"""
|
|
||||||
Append new line if code snippet is a
|
|
||||||
Python code snippet
|
|
||||||
"""
|
|
||||||
lines = code.split("\n")
|
|
||||||
|
|
||||||
if lines[0] in ["py", "python"]:
|
|
||||||
# add new line before last line being ```
|
|
||||||
last_line = lines[-1]
|
|
||||||
lines.pop()
|
|
||||||
lines.append("\n" + last_line)
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_remove_new_line(code):
|
|
||||||
"""
|
|
||||||
Remove new line if code snippet is a
|
|
||||||
Python code snippet
|
|
||||||
"""
|
|
||||||
lines = code.split("\n")
|
|
||||||
|
|
||||||
if lines[0] in ["py", "python"]:
|
|
||||||
# add new line before last line being ```
|
|
||||||
lines = lines[:-2] + lines[-1:]
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
def process_doc_file(code_file, add_new_line=True):
|
|
||||||
"""
|
|
||||||
Process given file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
code_file (`str` or `os.PathLike`): The file in which we want to style the docstring.
|
|
||||||
"""
|
|
||||||
with open(code_file, "r", encoding="utf-8", newline="\n") as f:
|
|
||||||
code = f.read()
|
|
||||||
|
|
||||||
# fmt: off
|
|
||||||
splits = code.split("```")
|
|
||||||
if len(splits) % 2 != 1:
|
|
||||||
raise ValueError("The number of occurrences of ``` should be an even number.")
|
|
||||||
|
|
||||||
splits = [s if i % 2 == 0 else process_code_block(s, add_new_line=add_new_line) for i, s in enumerate(splits)]
|
|
||||||
clean_code = "```".join(splits)
|
|
||||||
# fmt: on
|
|
||||||
|
|
||||||
diff = clean_code != code
|
|
||||||
if diff:
|
|
||||||
print(f"Overwriting content of {code_file}.")
|
|
||||||
with open(code_file, "w", encoding="utf-8", newline="\n") as f:
|
|
||||||
f.write(clean_code)
|
|
||||||
|
|
||||||
|
|
||||||
def process_doc_files(*files, add_new_line=True):
|
|
||||||
"""
|
|
||||||
Applies doc styling or checks everything is correct in a list of files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files (several `str` or `os.PathLike`): The files to treat.
|
|
||||||
Whether to restyle file or just check if they should be restyled.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[`str`]: The list of files changed or that should be restyled.
|
|
||||||
"""
|
|
||||||
for file in files:
|
|
||||||
# Treat folders
|
|
||||||
if os.path.isdir(file):
|
|
||||||
files = [os.path.join(file, f) for f in os.listdir(file)]
|
|
||||||
files = [f for f in files if os.path.isdir(f) or f.endswith(".mdx") or f.endswith(".py")]
|
|
||||||
process_doc_files(*files, add_new_line=add_new_line)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
process_doc_file(file, add_new_line=add_new_line)
|
|
||||||
except Exception:
|
|
||||||
print(f"There is a problem in {file}.")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def main(*files, add_new_line=True):
|
|
||||||
process_doc_files(*files, add_new_line=add_new_line)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("files", nargs="+", help="The file(s) or folder(s) to restyle.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--remove_new_line",
|
|
||||||
action="store_true",
|
|
||||||
help="Whether to remove new line after each python code block instead of adding one.",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
main(*args.files, add_new_line=not args.remove_new_line)
|
|
Loading…
Reference in New Issue
Block a user