Bring back the PR Refactor doctests + add CI to main (#23271)

* Revert "Revert "[Doctests] Refactor doctests + add CI" (#23245)" This reverts commit 69ee46243c. * try not expose HfDocTestParser * move into testing_utils.py * remove pytest install --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-03 12:50:06 +06:00 · 2023-05-10 22:00:48 +02:00 · 2023-05-10 22:00:48 +02:00 · 9088fcae82
commit 9088fcae82
parent b2846afda8
8 changed files with 269 additions and 179 deletions
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -51,6 +51,8 @@ class CircleCIJob:
    resource_class: Optional[str] = "xlarge"
    tests_to_run: Optional[List[str]] = None
    working_directory: str = "~/transformers"
    # This should be only used for doctest job!
    command_timeout: Optional[int] = None
    def __post_init__(self):
        # Deal with defaults for mutable attributes.
@ -107,11 +109,15 @@ class CircleCIJob:
        steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})
        all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
-        pytest_flags = [f"--{key}={value}" if value is not None else f"-{key}" for key, value in all_options.items()]
+        pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
        pytest_flags.append(
            f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
        )
-        test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+        test_command = ""
        if self.command_timeout:
            test_command = f"timeout {self.command_timeout} "
        test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
        if self.parallelism == 1:
            if self.tests_to_run is None:
                test_command += " << pipeline.parameters.tests_to_run >>"
@ -161,12 +167,37 @@ class CircleCIJob:
            steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
            steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
-            test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command = ""
            if self.timeout:
                test_command = f"timeout {self.timeout} "
            test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
            test_command += " $(cat splitted_tests.txt)"
        if self.marker is not None:
            test_command += f" -m {self.marker}"
-        test_command += " | tee tests_output.txt"
+
        if self.name == "pr_documentation_tests":
            # can't use ` | tee tee tests_output.txt` as usual
            test_command += " > tests_output.txt"
            # Save the return code, so we can check if it is timeout in the next step.
            test_command += '; touch "$?".txt'
            # Never fail the test step for the doctest job. We will check the results in the next step, and fail that
            # step instead if the actual test failures are found. This is to avoid the timeout being reported as test
            # failure.
            test_command = f"({test_command}) || true"
        else:
            test_command += " | tee tests_output.txt"
        steps.append({"run": {"name": "Run tests", "command": test_command}})
        # return code `124` means the previous (pytest run) step is timeout
        if self.name == "pr_documentation_tests":
            checkout_doctest_command = 'if [ -s reports/tests_pr_documentation_tests/failures_short.txt ]; '
            checkout_doctest_command += 'then echo "some test failed"; '
            checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/failures_short.txt; '
            checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/summary_short.txt; exit -1; '
            checkout_doctest_command += 'elif [ -s reports/tests_pr_documentation_tests/stats.txt ]; then echo "All tests pass!"; '
            checkout_doctest_command += 'elif [ -f 124.txt ]; then echo "doctest timeout!"; else echo "other fatal error)"; exit -1; fi;'
            steps.append({"run": {"name": "Check doctest results", "command": checkout_doctest_command}})
        steps.append({"store_artifacts": {"path": "~/transformers/tests_output.txt"}})
        steps.append({"store_artifacts": {"path": "~/transformers/reports"}})
        job["steps"] = steps
@ -401,6 +432,51 @@ repo_utils_job = CircleCIJob(
    tests_to_run="tests/repo_utils",
 )
 # At this moment, only the files that are in `utils/documentation_tests.txt` will be kept (together with a dummy file).
 py_command = 'import os; import json; fp = open("pr_documentation_tests.txt"); data_1 = fp.read().strip().split("\\n"); fp = open("utils/documentation_tests.txt"); data_2 = fp.read().strip().split("\\n"); to_test = [x for x in data_1 if x in set(data_2)] + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
 py_command = f"$(python3 -c '{py_command}')"
 command = f'echo "{py_command}" > pr_documentation_tests_filtered.txt'
 doc_test_job = CircleCIJob(
    "pr_documentation_tests",
    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
    install_steps=[
        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
        "pip install --upgrade pip",
        "pip install -e .[dev]",
        "pip install git+https://github.com/huggingface/accelerate",
        "pip install --upgrade pytest pytest-sugar",
        "find -name __pycache__ -delete",
        "find . -name \*.pyc -delete",
        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
        "touch dummy.py",
        {
            "name": "Get files to test",
            "command":
                "git remote add upstream https://github.com/huggingface/transformers.git && git fetch upstream \n"
                "git diff --name-only --relative --diff-filter=AMR refs/remotes/upstream/main...HEAD | grep -E '\.(py|mdx)$' | grep -Ev '^\..*|/\.' | grep -Ev '__' > pr_documentation_tests.txt"
        },
        {
            "name": "List files beings changed: pr_documentation_tests.txt",
            "command":
                "cat pr_documentation_tests.txt"
        },
        {
            "name": "Filter pr_documentation_tests.txt",
            "command":
                command
        },
        {
            "name": "List files beings tested: pr_documentation_tests_filtered.txt",
            "command":
                "cat pr_documentation_tests_filtered.txt"
        },
    ],
    tests_to_run="$(cat pr_documentation_tests_filtered.txt)",  # noqa
    pytest_options={"-doctest-modules": None, "doctest-glob": "*.mdx", "dist": "loadfile", "rvsA": None},
    command_timeout=1200,  # test cannot run longer than 1200 seconds
    pytest_num_workers=1,
 )
 REGULAR_TESTS = [
    torch_and_tf_job,
    torch_and_flax_job,
@ -411,6 +487,7 @@ REGULAR_TESTS = [
    hub_job,
    onnx_job,
    exotic_models_job,
    doc_test_job
 ]
 EXAMPLES_TESTS = [
    examples_torch_job,
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@ -37,18 +37,10 @@ jobs:
      - name: Show installed libraries and their versions
        run: pip freeze
      - name: Prepare files for doctests
        run: |
          python3 utils/prepare_for_doc_test.py src docs
      - name: Run doctests
        run: |
          python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"
      - name: Clean files after doctests
        run: |
          python3 utils/prepare_for_doc_test.py src docs --remove_new_line
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
--- a/8
+++ b/8
@ -47,10 +47,10 @@ repo-consistency:
 # this target runs checks on all files
 quality:
-	black --check $(check_dirs) setup.py
+	black --check $(check_dirs) setup.py conftest.py
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
-	ruff $(check_dirs) setup.py
+	ruff $(check_dirs) setup.py conftest.py
 	doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
 	python utils/check_doc_toc.py
@ -65,8 +65,8 @@ extra_style_checks:
 # this target runs checks on all files and potentially modifies some of them
 style:
-	black $(check_dirs) setup.py
+	black $(check_dirs) setup.py conftest.py
-	ruff $(check_dirs) setup.py --fix
+	ruff $(check_dirs) setup.py conftest.py --fix
 	${MAKE} autogenerate_code
 	${MAKE} extra_style_checks
--- a/conftest.py
+++ b/conftest.py
@ -20,6 +20,10 @@ import sys
 import warnings
 from os.path import abspath, dirname, join
 import _pytest
 from transformers.testing_utils import HfDoctestModule, HfDocTestParser
 # allow having multiple repository checkouts and not needing to remember to rerun
 # 'pip install -e .[dev]' when switching between checkouts and running tests.
@ -38,9 +42,7 @@ def pytest_configure(config):
    config.addinivalue_line(
        "markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
    )
-    config.addinivalue_line(
+    config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
        "markers", "is_pipeline_test: mark test to run only when pipelines are tested"
    )
    config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
    config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
    config.addinivalue_line("markers", "tool_tests: mark the tool tests that are run on their specific schedule")
@ -67,7 +69,7 @@ def pytest_sessionfinish(session, exitstatus):
 # Doctest custom flag to ignore output.
-IGNORE_RESULT = doctest.register_optionflag('IGNORE_RESULT')
+IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
 OutputChecker = doctest.OutputChecker
@ -80,3 +82,5 @@ class CustomOutputChecker(OutputChecker):
 doctest.OutputChecker = CustomOutputChecker
 _pytest.doctest.DoctestModule = HfDoctestModule
 doctest.DocTestParser = HfDocTestParser
--- a/docs/source/en/testing.mdx
+++ b/docs/source/en/testing.mdx
@ -212,20 +212,12 @@ Example:
    ```"""
 ```
 3 steps are required to debug the docstring examples: 
 1. In order to properly run the test, **an extra line has to be added** at the end of the docstring. This can be automatically done on any file using: 
 ```bash 
 python utils/prepare_for_doc_test.py <path_to_file_or_dir>
 ```
-2. Then, you can use the following line to automatically test every docstring example in the desired file: 
+Just run the following line to automatically test every docstring example in the desired file: 
 ```bash 
 pytest --doctest-modules <path_to_file_or_dir>
 ```
-3. Once you are done debugging, you need to remove the extra line added in step **1.** by running the following: 
+If the file has a markdown extention, you should add the `--doctest-glob="*.mdx"` argument.
 ```bash 
 python utils/prepare_for_doc_test.py <path_to_file_or_dir> --remove_new_line
 ```
 ### Run only modified tests
--- a/setup.cfg
+++ b/setup.cfg
@ -1,2 +1,3 @@
 [tool:pytest]
 doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
 doctest_glob=**/*.mdx
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@ -14,6 +14,7 @@
 import collections
 import contextlib
 import doctest
 import functools
 import inspect
 import logging
@ -30,11 +31,23 @@ import unittest
 from collections.abc import Mapping
 from io import StringIO
 from pathlib import Path
-from typing import Iterator, List, Optional, Union
+from typing import Iterable, Iterator, List, Optional, Union
 from unittest import mock
 import huggingface_hub
 import requests
 from _pytest.doctest import (
    Module,
    _get_checker,
    _get_continue_on_failure,
    _get_runner,
    _is_mocked,
    _patch_unwrap_mock_aware,
    get_optionflags,
    import_path,
 )
 from _pytest.outcomes import skip
 from pytest import DoctestItem
 from transformers import logging as transformers_logging
@ -1812,3 +1825,162 @@ def run_test_in_subprocess(test_case, target_func, inputs=None, timeout=None):
    if results["error"] is not None:
        test_case.fail(f'{results["error"]}')
 """
 The following contains utils to run the documentation tests without having to overwrite any files.
 The `preprocess_string` function adds `# doctest: +IGNORE_RESULT` markers on the fly anywhere a `load_dataset` call is
 made as a print would otherwise fail the corresonding line.
 To skip cuda tests, make sure to call `SKIP_CUDA_DOCTEST=1 pytest --doctest-modules <path_to_files_to_test>
 """
 def preprocess_string(string, skip_cuda_tests):
    """Prepare a docstring or a `.mdx` file to be run by doctest.
    The argument `string` would be the whole file content if it is a `.mdx` file. For a python file, it would be one of
    its docstring. In each case, it may contain multiple python code examples. If `skip_cuda_tests` is `True` and a
    cuda stuff is detective (with a heuristic), this method will return an empty string so no doctest will be run for
    `string`.
    """
    codeblock_pattern = r"(```(?:python|py)\s*\n\s*>>> )((?:.*?\n)*?.*?```)"
    codeblocks = re.split(re.compile(codeblock_pattern, flags=re.MULTILINE | re.DOTALL), string)
    is_cuda_found = False
    for i, codeblock in enumerate(codeblocks):
        if "load_dataset(" in codeblock and "# doctest: +IGNORE_RESULT" not in codeblock:
            codeblocks[i] = re.sub(r"(>>> .*load_dataset\(.*)", r"\1 # doctest: +IGNORE_RESULT", codeblock)
        if (
            (">>>" in codeblock or "..." in codeblock)
            and re.search(r"cuda|to\(0\)|device=0", codeblock)
            and skip_cuda_tests
        ):
            is_cuda_found = True
            break
    modified_string = ""
    if not is_cuda_found:
        modified_string = "".join(codeblocks)
    return modified_string
 class HfDocTestParser(doctest.DocTestParser):
    """
    Overwrites the DocTestParser from doctest to properly parse the codeblocks that are formatted with black. This
    means that there are no extra lines at the end of our snippets. The `# doctest: +IGNORE_RESULT` marker is also
    added anywhere a `load_dataset` call is made as a print would otherwise fail the corresponding line.
    Tests involving cuda are skipped base on a naive pattern that should be updated if it is not enough.
    """
    # This regular expression is used to find doctest examples in a
    # string.  It defines three groups: `source` is the source code
    # (including leading indentation and prompts); `indent` is the
    # indentation of the first (PS1) line of the source code; and
    # `want` is the expected output (including leading indentation).
    # fmt: off
    _EXAMPLE_RE = re.compile(r'''
        # Source consists of a PS1 line followed by zero or more PS2 lines.
        (?P<source>
            (?:^(?P<indent> [ ]*) >>>    .*)    # PS1 line
            (?:\n           [ ]*  \.\.\. .*)*)  # PS2 lines
        \n?
        # Want consists of any non-blank lines that do not start with PS1.
        (?P<want> (?:(?![ ]*$)    # Not a blank line
             (?![ ]*>>>)          # Not a line starting with PS1
             # !!!!!!!!!!! HF Specific !!!!!!!!!!!
             (?:(?!```).)*        # Match any character except '`' until a '```' is found (this is specific to HF because black removes the last line)
             # !!!!!!!!!!! HF Specific !!!!!!!!!!!
             (?:\n|$)  # Match a new line or end of string
          )*)
        ''', re.MULTILINE | re.VERBOSE
    )
    # fmt: on
    # !!!!!!!!!!! HF Specific !!!!!!!!!!!
    skip_cuda_tests: bool = bool(os.environ.get("SKIP_CUDA_DOCTEST", False))
    # !!!!!!!!!!! HF Specific !!!!!!!!!!!
    def parse(self, string, name="<string>"):
        """
        Overwrites the `parse` method to incorporate a skip for CUDA tests, and remove logs and dataset prints before
        calling `super().parse`
        """
        string = preprocess_string(string, self.skip_cuda_tests)
        return super().parse(string, name)
 class HfDoctestModule(Module):
    """
    Overwrites the `DoctestModule` of the pytest package to make sure the HFDocTestParser is used when discovering
    tests.
    """
    def collect(self) -> Iterable[DoctestItem]:
        class MockAwareDocTestFinder(doctest.DocTestFinder):
            """A hackish doctest finder that overrides stdlib internals to fix a stdlib bug.
            https://github.com/pytest-dev/pytest/issues/3456 https://bugs.python.org/issue25532
            """
            def _find_lineno(self, obj, source_lines):
                """Doctest code does not take into account `@property`, this
                is a hackish way to fix it. https://bugs.python.org/issue17446
                Wrapped Doctests will need to be unwrapped so the correct line number is returned. This will be
                reported upstream. #8796
                """
                if isinstance(obj, property):
                    obj = getattr(obj, "fget", obj)
                if hasattr(obj, "__wrapped__"):
                    # Get the main obj in case of it being wrapped
                    obj = inspect.unwrap(obj)
                # Type ignored because this is a private function.
                return super()._find_lineno(  # type:ignore[misc]
                    obj,
                    source_lines,
                )
            def _find(self, tests, obj, name, module, source_lines, globs, seen) -> None:
                if _is_mocked(obj):
                    return
                with _patch_unwrap_mock_aware():
                    # Type ignored because this is a private function.
                    super()._find(  # type:ignore[misc]
                        tests, obj, name, module, source_lines, globs, seen
                    )
        if self.path.name == "conftest.py":
            module = self.config.pluginmanager._importconftest(
                self.path,
                self.config.getoption("importmode"),
                rootpath=self.config.rootpath,
            )
        else:
            try:
                module = import_path(
                    self.path,
                    root=self.config.rootpath,
                    mode=self.config.getoption("importmode"),
                )
            except ImportError:
                if self.config.getvalue("doctest_ignore_import_errors"):
                    skip("unable to import module %r" % self.path)
                else:
                    raise
        # !!!!!!!!!!! HF Specific !!!!!!!!!!!
        finder = MockAwareDocTestFinder(parser=HfDocTestParser())
        # !!!!!!!!!!! HF Specific !!!!!!!!!!!
        optionflags = get_optionflags(self)
        runner = _get_runner(
            verbose=False,
            optionflags=optionflags,
            checker=_get_checker(),
            continue_on_failure=_get_continue_on_failure(self.config),
        )
        for test in finder.find(module, module.__name__):
            if test.examples:  # skip empty doctests and cuda
                yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
--- a/utils/prepare_for_doc_test.py
+++ b/utils/prepare_for_doc_test.py
@ -1,148 +0,0 @@
 # coding=utf-8
 # Copyright 2022 The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Style utils to preprocess files for doc tests.
    The doc precossing function can be run on a list of files and/org
    directories of files. It will recursively check if the files have
    a python code snippet by looking for a ```python or ```py syntax.
    In the default mode - `remove_new_line==False` the script will
    add a new line before every python code ending ``` line to make
    the docstrings ready for pytest doctests.
    However, we don't want to have empty lines displayed in the
    official documentation which is why the new line command can be
    reversed by adding the flag `--remove_new_line` which sets
    `remove_new_line==True`.
    When debugging the doc tests locally, please make sure to
    always run:
    ```python utils/prepare_for_doc_test.py src docs```
    before running the doc tests:
    ```pytest --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"```
    Afterwards you should revert the changes by running
    ```python utils/prepare_for_doc_test.py src docs --remove_new_line```
 """
 import argparse
 import os
 def process_code_block(code, add_new_line=True):
    if add_new_line:
        return maybe_append_new_line(code)
    else:
        return maybe_remove_new_line(code)
 def maybe_append_new_line(code):
    """
    Append new line if code snippet is a
    Python code snippet
    """
    lines = code.split("\n")
    if lines[0] in ["py", "python"]:
        # add new line before last line being ```
        last_line = lines[-1]
        lines.pop()
        lines.append("\n" + last_line)
    return "\n".join(lines)
 def maybe_remove_new_line(code):
    """
    Remove new line if code snippet is a
    Python code snippet
    """
    lines = code.split("\n")
    if lines[0] in ["py", "python"]:
        # add new line before last line being ```
        lines = lines[:-2] + lines[-1:]
    return "\n".join(lines)
 def process_doc_file(code_file, add_new_line=True):
    """
    Process given file.
    Args:
        code_file (`str` or `os.PathLike`): The file in which we want to style the docstring.
    """
    with open(code_file, "r", encoding="utf-8", newline="\n") as f:
        code = f.read()
    # fmt: off
    splits = code.split("```")
    if len(splits) % 2 != 1:
        raise ValueError("The number of occurrences of ``` should be an even number.")
    splits = [s if i % 2 == 0 else process_code_block(s, add_new_line=add_new_line) for i, s in enumerate(splits)]
    clean_code = "```".join(splits)
    # fmt: on
    diff = clean_code != code
    if diff:
        print(f"Overwriting content of {code_file}.")
        with open(code_file, "w", encoding="utf-8", newline="\n") as f:
            f.write(clean_code)
 def process_doc_files(*files, add_new_line=True):
    """
    Applies doc styling or checks everything is correct in a list of files.
    Args:
        files (several `str` or `os.PathLike`): The files to treat.
            Whether to restyle file or just check if they should be restyled.
    Returns:
        List[`str`]: The list of files changed or that should be restyled.
    """
    for file in files:
        # Treat folders
        if os.path.isdir(file):
            files = [os.path.join(file, f) for f in os.listdir(file)]
            files = [f for f in files if os.path.isdir(f) or f.endswith(".mdx") or f.endswith(".py")]
            process_doc_files(*files, add_new_line=add_new_line)
        else:
            try:
                process_doc_file(file, add_new_line=add_new_line)
            except Exception:
                print(f"There is a problem in {file}.")
                raise
 def main(*files, add_new_line=True):
    process_doc_files(*files, add_new_line=add_new_line)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("files", nargs="+", help="The file(s) or folder(s) to restyle.")
    parser.add_argument(
        "--remove_new_line",
        action="store_true",
        help="Whether to remove new line after each python code block instead of adding one.",
    )
    args = parser.parse_args()
    main(*args.files, add_new_line=not args.remove_new_line)