From 5061a9fd557e9a7b0c8fbabb47a446e8776d9e16 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Thu, 16 Dec 2021 09:29:26 -0500 Subject: [PATCH] Post sphinx-clean up and contributing guide updates (#14790) * Clean up sphinx * Update contributing guide * Update docs README * No example title * Fix copies * Update CONTRIBUTING.md Co-authored-by: Lysandre Debut Co-authored-by: Lysandre Debut --- CONTRIBUTING.md | 91 ++++--- Makefile | 5 - docs/Makefile | 19 -- docs/README.md | 239 +++++++----------- setup.py | 23 +- src/transformers/dependency_versions_table.py | 9 - 6 files changed, 157 insertions(+), 229 deletions(-) delete mode 100644 docs/Makefile diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4dadc37af6a..bf5415614c3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -124,7 +124,7 @@ issues to make sure that nobody is already working on the same thing. If you are unsure, it is always a good idea to open an issue to get some feedback. You will need basic `git` proficiency to be able to contribute to -`transformers`. `git` is not the easiest tool to use but it has the greatest +🤗 Transformers. `git` is not the easiest tool to use but it has the greatest manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro Git](https://git-scm.com/book/en/v2) is a very good reference. @@ -175,34 +175,26 @@ Follow these steps to start contributing: 5. Develop the features on your branch. As you work on the features, you should make sure that the test suite - passes: + passes. You should run the tests impacted by your changes like this: + + ```bash + $ pytest tests/.py + ``` + + You can also run the full suite with the following command, but it takes + a beefy machine to produce a result in a decent amount of time now that + Transformers has grown a lot. Here is the command for it: ```bash $ make test ``` - Note, that this command uses `-n auto` pytest flag, therefore, it will start as many parallel `pytest` processes as the number of your computer's CPU-cores, and if you have lots of those and a few GPUs and not a great amount of RAM, it's likely to overload your computer. Therefore, to run the test suite, you may want to consider using this command instead: + For more information about tests, check out the + [dedicated documentation](https://huggingface.co/docs/transformers/testing) - ```bash - $ python -m pytest -n 3 --dist=loadfile -s -v ./tests/ - ``` - - Adjust the value of `-n` to fit the load your hardware can support. - - `transformers` relies on `black` and `isort` to format its source code - consistently. After you make changes, format them with: - - ```bash - $ make style - ``` - - `transformers` also uses `flake8` and a few custom scripts to check for coding mistakes. Quality - control runs in CI, however you can also run the same checks with: - - ```bash - $ make quality - ``` - You can do the automatic style corrections and code verifications that can't be automated in one go: + 🤗 Transformers relies on `black` and `isort` to format its source code + consistently. After you make changes, apply automatic style corrections and code verifications + that can't be automated in one go with: ```bash $ make fixup @@ -210,16 +202,55 @@ Follow these steps to start contributing: This target is also optimized to only work with files modified by the PR you're working on. - If you're modifying documents under `docs/source`, make sure to validate that - they can still be built. This check also runs in CI. To run a local check - make sure you have installed the documentation builder requirements, by - running `pip install .[tf,torch,docs]` once from the root of this repository - and then run: + If you prefer to run the checks one after the other, the following command apply the + style corrections: ```bash - $ make docs + $ make style ``` + 🤗 Transformers also uses `flake8` and a few custom scripts to check for coding mistakes. Quality + control runs in CI, however you can also run the same checks with: + + ```bash + $ make quality + ``` + + Finally we have a lot of scripts that check we didn't forget to update + some files when adding a new model, that you can run with + + ```bash + $ make repo-consitency + ``` + + To learn more about those checks and how to fix any issue with them, check out the + [documentation](https://huggingface.co/docs/transformers/pr_checks) + + If you're modifying documents under `docs/source`, make sure to validate that + they can still be built. This check also runs in CI. To run a local check + make sure you have installed the documentation builder requirements. First you will need to clone the + repository containing our tools to build the documentation: + + ```bash + $ pip install git+https://github.com/huggingface/doc-builder + ``` + + Then, make sure you have all the dependencies to be able to build the doc with: + + ```bash + $ pip install .[docs] + ``` + + Finally run the following command from the root of the repository: + + ```bash + $ doc-builder build transformers docs/source/ --build_dir ~/tmp/test-build + ``` + + This will build the documentation in the `~/tmp/test-build` folder where you can inspect the generated + Markdown files with your favorite editor. You won't be able to see the final rendering on the website + before your PR is merged, we are actively working on adding a too for this. + Once you're happy with your changes, add changed files using `git add` and make a commit with `git commit` to record your changes locally: @@ -333,7 +364,7 @@ $ python -m unittest discover -s examples -t examples -v ### Style guide -For documentation strings, `transformers` follows the [google style](https://google.github.io/styleguide/pyguide.html). +For documentation strings, 🤗 Transformers follows the [google style](https://google.github.io/styleguide/pyguide.html). Check our [documentation writing guide](https://github.com/huggingface/transformers/tree/master/docs#writing-documentation---specification) for more information. diff --git a/Makefile b/Makefile index 51091e09909..75b75266253 100644 --- a/Makefile +++ b/Makefile @@ -91,11 +91,6 @@ test-sagemaker: # install sagemaker dependencies in advance with pip install .[s TEST_SAGEMAKER=True python -m pytest -n auto -s -v ./tests/sagemaker -# Check that docs can build - -docs: - cd docs && make html SPHINXOPTS="-W -j 4" - # Release stuff pre-release: diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 8879933e6cd..00000000000 --- a/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 36b39f3e67a..9e6a29424b9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -23,6 +23,12 @@ you can install them with the following command, at the root of the code reposit pip install -e ".[docs]" ``` +Then you need to install our special tool that builds the documentation: + +```bash +pip install git+https://github.com/huggingface/doc-builder +``` + --- **NOTE** @@ -31,60 +37,34 @@ check how they look like before committing for instance). You don't have to comm --- -## Packages installed - -Here's an overview of all the packages installed. If you ran the previous command installing all packages from -`requirements.txt`, you do not need to run the following commands. - -Building it requires the package `sphinx` that you can -install using: - -```bash -pip install -U sphinx -``` - -You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by -[Read The Docs](https://readthedocs.org/). You can install it using the following command: - -```bash -pip install sphinx_rtd_theme -``` - -The third necessary package is the `recommonmark` package to accept Markdown as well as Restructured text: - -```bash -pip install recommonmark -``` - ## Building the documentation -Once you have setup `sphinx`, you can build the documentation by running the following command in the `/docs` folder: +Once you have setup the `doc-builder` and additional packages, you can generate the documentation by typing th +folowwing command: ```bash -make html +doc-builder build transformers docs/source/ --build_dir ~/tmp/test-build ``` -A folder called ``_build/html`` should have been created. You can now open the file ``_build/html/index.html`` in your -browser. +You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate +the MDX files that will be rendered as the documentation on the main website. You can inspect them in your favorite +Markdown editor. --- **NOTE** -If you are adding/removing elements from the toc-tree or from any structural item, it is recommended to clean the build -directory before rebuilding. Run the following command to clean and build: - -```bash -make clean && make html -``` +It's not possible to see locally how the final documentation will look like for now. We are working on solutions to +enable this, but any pre-visualiser of Markdown file should already give you a good idea of the result! --- -It should build the static app that will be available under `/docs/_build/html` +## Adding a new element to the navigation bar -## Adding a new element to the tree (toc-tree) +Accepted files are reStructuredText (.rst) and Markdown (.md or .mdx). We are progressively moving away from rst so you should +create any new documentation file in the .mdx format. -Accepted files are reStructuredText (.rst) and Markdown (.md). Create a file with its extension and put it -in the source directory. You can then link it to the toc-tree by putting the filename without the extension. +Create a file with its extension and put it in the source directory. You can then link it to the toc-tree by putting +the filename without the extension in the [`_toctree.yml`](https://github.com/huggingface/transformers/blob/master/docs/source/_toctree.yml) file. ## Renaming section headers and moving sections @@ -114,31 +94,22 @@ For an example of a rich moved sections set please see the very end of [the Trai ## Preview the documentation in a pull request -Once you have made your pull request, you can check what the documentation will look like after it's merged by -following these steps: - -- Look at the checks at the bottom of the conversation page of your PR (you may need to click on "show all checks" to - expand them). -- Click on "details" next to the `ci/circleci: build_doc` check. -- In the new window, click on the "Artifacts" tab. -- Locate the file "docs/_build/html/index.html" (or any specific page you want to check) and click on it to get a - preview. +Coming soon! ## Writing Documentation - Specification The `huggingface/transformers` documentation follows the -[Google documentation](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) style. It is -mostly written in ReStructuredText -([Sphinx simple documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html), -[Sourceforge complete documentation](https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html)). - +[Google documentation](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) style for docstrings, +although we can write them directly in Markdown. Parts of it are written in ReStructuredText +([Sphinx simple documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html) but we are +updating those. ### Adding a new tutorial Adding a new tutorial or section is done in two steps: - Add a new file under `./source`. This file can either be ReStructuredText (.rst) or Markdown (.md). -- Link that file in `./source/index.rst` on the correct toc-tree. +- Link that file in `./source/_toctree.yml` on the correct toc-tree. Make sure to put your new file under the proper section. It's unlikely to go in the first section (*Get Started*), so depending on the intended targets (beginners, more advanced users or researchers) it should go in section two, three or @@ -148,8 +119,8 @@ four. When adding a new model: -- Create a file `xxx.rst` under `./source/model_doc` (don't hesitate to copy an existing file as template). -- Link that file in `./source/index.rst` on the `model_doc` toc-tree. +- Create a file `xxx.mdx` or under `./source/model_doc` (don't hesitate to copy an existing file as template). +- Link that file in `./source/_toctree.yml`. - Write a short overview of the model: - Overview with paper & authors - Paper abstract @@ -164,63 +135,79 @@ When adding a new model: - TensorFlow base model - TensorFlow head models -These classes should be added using the RST syntax. Usually as follows: -``` -XXXConfig -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +These classes should be added using our Markdown syntax. Usually as follows: -.. autoclass:: transformers.XXXConfig - :members: +``` +## XXXConfig + +[[autodoc]] XXXConfig ``` This will include every public method of the configuration that is documented. If for some reason you wish for a method not to be displayed in the documentation, you can do so by specifying which methods should be in the docs: ``` -XXXTokenizer -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## XXXTokenizer -.. autoclass:: transformers.XXXTokenizer - :members: build_inputs_with_special_tokens, get_special_tokens_mask, - create_token_type_ids_from_sequences, save_vocabulary +[[autodoc]] XXXTokenizer + - build_inputs_with_special_tokens + - get_special_tokens_mask + - create_token_type_ids_from_sequences + - save_vocabulary +``` +If you just want to add a method that is not documented (for instance magic method like `__call__` are not documented +byt default) you can put the list of methods to add in a list that contains `all`: + +``` +## XXXTokenizer + +[[autodoc]] XXXTokenizer + - all + - __call__ ``` ### Writing source documentation -Values that should be put in `code` should either be surrounded by double backticks: \`\`like so\`\` or be written as -an object using the :obj: syntax: :obj:\`like so\`. Note that argument names and objects like True, None or any strings -should usually be put in `code`. +Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names +and objects like True, None or any strings should usually be put in `code`. -When mentioning a class, it is recommended to use the :class: syntax as the mentioned class will be automatically -linked by Sphinx: :class:\`~transformers.XXXClass\` +When mentioning a class, function or method, it is recommended to use our syntax for internal links so that our tool +adds a link to its documentation with this syntax: \[\`XXXClass\`\] or \[\`function\`\]. This requires the class or +function to be in the main package. -When mentioning a function, it is recommended to use the :func: syntax as the mentioned function will be automatically -linked by Sphinx: :func:\`~transformers.function\`. +If you want to create a link to some internal class or function, you need to +provide its path. For instance: \[\`file_utils.ModelOutput\`\]. This will be converted into a link with +`file_utils.ModelOutput` in the description. To get rid of the path and only keep the name of the object you are +linking to, add a ~: \[\`~file_utils.ModelOutput\`\] will generate a link with `ModelOutput` in the description. -When mentioning a method, it is recommended to use the :meth: syntax as the mentioned method will be automatically -linked by Sphinx: :meth:\`~transformers.XXXClass.method\`. - -Links should be done as so (note the double underscore at the end): \`text for the link <./local-link-or-global-link#loc>\`__ +The same wroks for methods so you can either use \[\`XXXClass.method\`\] or \[~\`XXXClass.method\`\]. #### Defining arguments in a method -Arguments should be defined with the `Args:` prefix, followed by a line return and an indentation. -The argument should be followed by its type, with its shape if it is a tensor, and a line return. -Another indentation is necessary before writing the description of the argument. +Arguments should be defined with the `Args:` (or `Arguments:` or `Parameters:`) prefix, followed by a line return and +an indentation. The argument should be followed by its type, with its shape if it is a tensor, a colon and its +description: + +``` + Args: + n_layers (`int`): The number of layers of the model. +``` + +If the description is too long to fit in one line, another indentation is necessary before writing the description +after th argument. Here's an example showcasing everything so far: ``` Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`): + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): Indices of input sequence tokens in the vocabulary. - Indices can be obtained using :class:`~transformers.AlbertTokenizer`. - See :meth:`~transformers.PreTrainedTokenizer.encode` and - :meth:`~transformers.PreTrainedTokenizer.__call__` for details. + Indices can be obtained using [`AlbertTokenizer`]. See [`~PreTrainedTokenizer.encode`] and + [`~PreTrainedTokenizer.__call__`] for details. - `What are input IDs? <../glossary.html#input-ids>`__ + [What are input IDs?](../glossary#input-ids) ``` For optional arguments or arguments with defaults we follow the following syntax: imagine we have a function with the @@ -234,97 +221,56 @@ then its documentation should look like this: ``` Args: - x (:obj:`str`, `optional`): + x (`str`, *optional*): This argument controls ... - a (:obj:`float`, `optional`, defaults to 1): + a (`float`, *optional*, defaults to 1): This argument is used to ... ``` -Note that we always omit the "defaults to :obj:\`None\`" when None is the default for any argument. Also note that even +Note that we always omit the "defaults to \`None\`" when None is the default for any argument. Also note that even if the first line describing your argument type and its default gets long, you can't break it on several lines. You can however write as many lines as you want in the indented description (see the example above with `input_ids`). #### Writing a multi-line code block -Multi-line code blocks can be useful for displaying examples. They are done like so: +Multi-line code blocks can be useful for displaying examples. They are done between two lines of three backticks as usual in Markdown: + +```` ``` -Example:: - - # first line of code - # second line - # etc +# first line of code +# second line +# etc ``` - -The `Example` string at the beginning can be replaced by anything as long as there are two semicolons following it. +```` We follow the [doctest](https://docs.python.org/3/library/doctest.html) syntax for the examples to automatically test the results stay consistent with the library. #### Writing a return block -Arguments should be defined with the `Args:` prefix, followed by a line return and an indentation. +The return block should be introduced with the `Returns:` prefix, followed by a line return and an indentation. The first line should be the type of the return, followed by a line return. No need to indent further for the elements building the return. -Here's an example for tuple return, comprising several objects: - -``` - Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: - loss (`optional`, returned when ``masked_lm_labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: - Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. - prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) - Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). -``` - Here's an example for a single value return: ``` Returns: - :obj:`List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token. + `List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token. ``` -#### Adding a new section - -In ReST section headers are designated as such with the help of a line of underlying characters, e.g.,: +Here's an example for tuple return, comprising several objects: ``` -Section 1 -^^^^^^^^^^^^^^^^^^ - -Sub-section 1 -~~~~~~~~~~~~~~~~~~ + Returns: + `tuple(torch.FloatTensor)` comprising various elements depending on the configuration ([`BertConfig`]) and inputs: + - ** loss** (*optional*, returned when `masked_lm_labels` is provided) `torch.FloatTensor` of shape `(1,)` -- + Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. + - **prediction_scores** (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`) -- + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). ``` -ReST allows the use of any characters to designate different section levels, as long as they are used consistently within the same document. For details see [sections doc](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#sections). Because there is no standard different documents often end up using different characters for the same levels which makes it very difficult to know which character to use when creating a new section. - -Specifically, if when running `make docs` you get an error like: -``` -docs/source/main_classes/trainer.rst:127:Title level inconsistent: -``` -you picked an inconsistent character for some of the levels. - -But how do you know which characters you must use for an already existing level or when adding a new level? - -You can use this helper script: -``` -perl -ne '/^(.)\1{100,}/ && do { $h{$1}=++$c if !$h{$1} }; END { %h = reverse %h ; print "$_ $h{$_}\n" for sort keys %h}' docs/source/main_classes/trainer.rst -1 - -2 ~ -3 ^ -4 = -5 " -``` - -This tells you which characters have already been assigned for each level. - -So using this particular example's output -- if your current section's header uses `=` as its underline character, you now know you're at level 4, and if you want to add a sub-section header you know you want `"` as it'd level 5. - -If you needed to add yet another sub-level, then pick a character that is not used already. That is you must pick a character that is not in the output of that script. - -Here is the full list of characters that can be used in this context: `= - ` : ' " ~ ^ _ * + # < >` - #### Adding an image Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos and other non-text files. We prefer to leverage a hf.co hosted `dataset` like @@ -332,3 +278,4 @@ the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-te them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images). If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images to this dataset. + diff --git a/setup.py b/setup.py index 96fd9c26858..6acb8aef1d5 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,6 @@ _deps = [ "dataclasses", "datasets", "deepspeed>=0.5.7", - "docutils==0.16.0", "fairscale>0.3", "faiss-cpu", "fastapi", @@ -133,7 +132,6 @@ _deps = [ "pytest-xdist", "python>=3.6.0", "ray[tune]", - "recommonmark", "regex!=2019.12.17", "requests", "rouge-score", @@ -144,13 +142,6 @@ _deps = [ "sentencepiece>=0.1.91,!=0.1.92", "sigopt", "librosa", - "markdown!=3.3.5", - "sphinx-copybutton", - "sphinx-markdown-tables", - "sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style. - "sphinx==3.2.1", - "sphinxext-opengraph==0.4.1", - "sphinx-intl", "starlette", "tensorflow-cpu>=2.3", "tensorflow>=2.3", @@ -296,17 +287,9 @@ extras["all"] = ( + extras["codecarbon"] ) -extras["docs_specific"] = deps_list( - "docutils", - "markdown", - "recommonmark", - "sphinx", - "sphinx-markdown-tables", - "sphinx-rtd-theme", - "sphinx-copybutton", - "sphinxext-opengraph", - "sphinx-intl", -) +# Might need to add doc-builder and some specific deps in the future +extras["docs_specific"] = [] + # "docs" needs "all" to resolve all the references extras["docs"] = extras["all"] + extras["docs_specific"] diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 328f640a29c..7fcef460a38 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -9,7 +9,6 @@ deps = { "dataclasses": "dataclasses", "datasets": "datasets", "deepspeed": "deepspeed>=0.5.7", - "docutils": "docutils==0.16.0", "fairscale": "fairscale>0.3", "faiss-cpu": "faiss-cpu", "fastapi": "fastapi", @@ -44,7 +43,6 @@ deps = { "pytest-xdist": "pytest-xdist", "python": "python>=3.6.0", "ray[tune]": "ray[tune]", - "recommonmark": "recommonmark", "regex": "regex!=2019.12.17", "requests": "requests", "rouge-score": "rouge-score", @@ -55,13 +53,6 @@ deps = { "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92", "sigopt": "sigopt", "librosa": "librosa", - "markdown": "markdown!=3.3.5", - "sphinx-copybutton": "sphinx-copybutton", - "sphinx-markdown-tables": "sphinx-markdown-tables", - "sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3", - "sphinx": "sphinx==3.2.1", - "sphinxext-opengraph": "sphinxext-opengraph==0.4.1", - "sphinx-intl": "sphinx-intl", "starlette": "starlette", "tensorflow-cpu": "tensorflow-cpu>=2.3", "tensorflow": "tensorflow>=2.3",