From 314cca2842ec92204ed8f85ae3a18d01a347bf7a Mon Sep 17 00:00:00 2001
From: Sugeeth <raghava.sugeeth@gmail.com>
Date: Tue, 5 Jan 2021 16:48:48 +0530
Subject: [PATCH] Fix documentation links always pointing to master. (#9217)

* Use extlinks to point hyperlink with the version of code

* Point to version on release and master until then

* Apply style

* Correct links

* Add missing backtick

* Simple missing backtick after all.

Co-authored-by: Raghavendra Sugeeth P S <raghav-5305@raghav-5305.csez.zohocorpin.com>
Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
---
 docs/source/benchmarks.rst                   |  8 ++---
 docs/source/bertology.rst                    |  6 ++--
 docs/source/conf.py                          |  6 +++-
 docs/source/converting_tensorflow_models.rst | 10 +++---
 docs/source/main_classes/processors.rst      |  4 +--
 docs/source/model_doc/bart.rst               |  2 +-
 docs/source/model_doc/barthez.rst            |  4 +--
 docs/source/model_doc/marian.rst             |  9 ++---
 docs/source/model_doc/mbart.rst              |  2 +-
 docs/source/model_doc/pegasus.rst            |  5 ++-
 docs/source/multilingual.rst                 |  5 ++-
 docs/source/task_summary.rst                 |  6 ++--
 docs/source/testing.rst                      | 35 ++++++++------------
 13 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/docs/source/benchmarks.rst b/docs/source/benchmarks.rst
index 93fb7c05888..ba79052cfe2 100644
--- a/docs/source/benchmarks.rst
+++ b/docs/source/benchmarks.rst
@@ -15,8 +15,8 @@ Benchmarks
 
 Let's take a look at how 🤗 Transformer models can be benchmarked, best practices, and already available benchmarks.
 
-A notebook explaining in more detail how to benchmark 🤗 Transformer models can be found `here
-<https://github.com/huggingface/transformers/blob/master/notebooks/05-benchmark.ipynb>`__.
+A notebook explaining in more detail how to benchmark 🤗 Transformer models can be found :prefix_link:`here
+<notebooks/05-benchmark.ipynb>`.
 
 How to benchmark 🤗 Transformer models
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -353,5 +353,5 @@ The approach is detailed in the `following blogpost
 available `here
 <https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing>`__.
 
-With the new `benchmark` tools, it is easier than ever to share your benchmark results with the community `here
-<https://github.com/huggingface/transformers/blob/master/examples/benchmarking/README.md>`__.
+With the new `benchmark` tools, it is easier than ever to share your benchmark results with the community
+:prefix_link:`here <examples/benchmarking/README.md>`.
diff --git a/docs/source/bertology.rst b/docs/source/bertology.rst
index dde044f4380..79fa34abfcb 100644
--- a/docs/source/bertology.rst
+++ b/docs/source/bertology.rst
@@ -33,6 +33,6 @@ help people access the inner representations, mainly adapted from the great work
 * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
   in https://arxiv.org/abs/1905.10650.
 
-To help you understand and use these features, we have added a specific example script: `bertology.py
-<https://github.com/huggingface/transformers/blob/master/examples/research_projects/bertology/run_bertology.py>`_ while
-extract information and prune a model pre-trained on GLUE.
+To help you understand and use these features, we have added a specific example script: :prefix_link:`bertology.py
+<examples/research_projects/bertology/run_bertology.py>` while extract information and prune a model pre-trained on
+GLUE.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e97b8c88715..a82a5dc7862 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -27,7 +27,10 @@ author = u'huggingface'
 version = u''
 # The full version, including alpha/beta/rc tags
 release = u'4.1.1'
-
+# Prefix link to point to master, comment this during version release and uncomment below line
+extlinks = {'prefix_link': ('https://github.com/huggingface/transformers/blob/master/%s', '')}
+# Prefix link to always point to corresponding version, uncomment this during version release
+# extlinks = {'prefix_link': ('https://github.com/huggingface/transformers/blob/v'+ release + '/%s', '')}
 
 # -- General configuration ---------------------------------------------------
 
@@ -40,6 +43,7 @@ release = u'4.1.1'
 # ones.
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx.ext.extlinks',
     'sphinx.ext.coverage',
     'sphinx.ext.napoleon',
     'recommonmark',
diff --git a/docs/source/converting_tensorflow_models.rst b/docs/source/converting_tensorflow_models.rst
index 2f733d48f1b..9cc1333fdea 100644
--- a/docs/source/converting_tensorflow_models.rst
+++ b/docs/source/converting_tensorflow_models.rst
@@ -27,9 +27,8 @@ BERT
 
 You can convert any TensorFlow checkpoint for BERT (in particular `the pre-trained models released by Google
 <https://github.com/google-research/bert#pre-trained-models>`_\ ) in a PyTorch save file by using the
-`convert_bert_original_tf_checkpoint_to_pytorch.py
-<https://github.com/huggingface/transformers/blob/master/src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py>`_
-script.
+:prefix_link:`convert_bert_original_tf_checkpoint_to_pytorch.py
+<src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py>` script.
 
 This CLI takes as input a TensorFlow checkpoint (three files starting with ``bert_model.ckpt``\ ) and the associated
 configuration file (\ ``bert_config.json``\ ), and creates a PyTorch model for this configuration, loads the weights
@@ -66,9 +65,8 @@ ALBERT
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Convert TensorFlow model checkpoints of ALBERT to PyTorch using the
-`convert_albert_original_tf_checkpoint_to_pytorch.py
-<https://github.com/huggingface/transformers/blob/master/src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py>`_
-script.
+:prefix_link:`convert_albert_original_tf_checkpoint_to_pytorch.py
+<src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py>` script.
 
 The CLI takes as input a TensorFlow checkpoint (three files starting with ``model.ckpt-best``\ ) and the accompanying
 configuration file (\ ``albert_config.json``\ ), then creates and saves a PyTorch model. To run this conversion you
diff --git a/docs/source/main_classes/processors.rst b/docs/source/main_classes/processors.rst
index c8b6c344c90..793ee1b1332 100644
--- a/docs/source/main_classes/processors.rst
+++ b/docs/source/main_classes/processors.rst
@@ -168,5 +168,5 @@ Using `tensorflow_datasets` is as easy as using a data file:
     )
 
 
-Another example using these processors is given in the `run_squad.py
-<https://github.com/huggingface/transformers/blob/master/examples/question-answering/run_squad.py>`__ script.
+Another example using these processors is given in the :prefix_link:`run_squad.py
+<examples/question-answering/run_squad.py>` script.
diff --git a/docs/source/model_doc/bart.rst b/docs/source/model_doc/bart.rst
index b167cf64fd8..a349fa32dad 100644
--- a/docs/source/model_doc/bart.rst
+++ b/docs/source/model_doc/bart.rst
@@ -42,7 +42,7 @@ Examples
 _______________________________________________________________________________________________________________________
 
 - Examples and scripts for fine-tuning BART and other models for sequence to sequence tasks can be found in
-  `examples/seq2seq/ <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+  :prefix_link:`examples/seq2seq/ <examples/seq2seq/README.md>`.
 - An example of how to train :class:`~transformers.BartForConditionalGeneration` with a Hugging Face :obj:`datasets`
   object can be found in this `forum discussion
   <https://discuss.huggingface.co/t/train-bart-for-conditional-generation-e-g-summarization/1904>`__.
diff --git a/docs/source/model_doc/barthez.rst b/docs/source/model_doc/barthez.rst
index f0f1d4be429..3b360e30f6e 100644
--- a/docs/source/model_doc/barthez.rst
+++ b/docs/source/model_doc/barthez.rst
@@ -41,8 +41,8 @@ The Authors' code can be found `here <https://github.com/moussaKam/BARThez>`__.
 Examples
 _______________________________________________________________________________________________________________________
 
-- BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check: `examples/seq2seq/
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+- BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  :prefix_link:`examples/seq2seq/ <examples/seq2seq/README.md>`.
 
 
 BarthezTokenizer
diff --git a/docs/source/model_doc/marian.rst b/docs/source/model_doc/marian.rst
index d6384bbdf3e..c6000260428 100644
--- a/docs/source/model_doc/marian.rst
+++ b/docs/source/model_doc/marian.rst
@@ -56,12 +56,9 @@ Examples
 
 - Since Marian models are smaller than many other translation models available in the library, they can be useful for
   fine-tuning experiments and integration tests.
-- `Fine-tune on TPU
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/builtin_trainer/train_distil_marian_enro_tpu.sh>`__
-- `Fine-tune on GPU
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/builtin_trainer/train_distil_marian_enro.sh>`__
-- `Fine-tune on GPU with pytorch-lightning
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/distil_marian_no_teacher.sh>`__
+- :prefix_link:`Fine-tune on TPU <examples/seq2seq/builtin_trainer/train_distil_marian_enro_tpu.sh>`
+- :prefix_link:`Fine-tune on GPU <examples/seq2seq/builtin_trainer/train_distil_marian_enro.sh>`
+- :prefix_link:`Fine-tune on GPU with pytorch-lightning <examples/seq2seq/distil_marian_no_teacher.sh>`
 
 Multilingual Models
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/model_doc/mbart.rst b/docs/source/model_doc/mbart.rst
index 4ac391255eb..ac283c06e94 100644
--- a/docs/source/model_doc/mbart.rst
+++ b/docs/source/model_doc/mbart.rst
@@ -35,7 +35,7 @@ Examples
 _______________________________________________________________________________________________________________________
 
 - Examples and scripts for fine-tuning mBART and other models for sequence to sequence tasks can be found in
-  `examples/seq2seq/ <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+  :prefix_link:`examples/seq2seq/ <examples/seq2seq/README.md>`.
 - Given the large embeddings table, mBART consumes a large amount of GPU RAM, especially for fine-tuning.
   :class:`MarianMTModel` is usually a better choice for bilingual machine translation.
 
diff --git a/docs/source/model_doc/pegasus.rst b/docs/source/model_doc/pegasus.rst
index 3fab320ebcb..820b0be8aae 100644
--- a/docs/source/model_doc/pegasus.rst
+++ b/docs/source/model_doc/pegasus.rst
@@ -51,9 +51,8 @@ All the `checkpoints <https://huggingface.co/models?search=pegasus>`__ are fine-
 Examples
 _______________________________________________________________________________________________________________________
 
-- `Script <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/finetune_pegasus_xsum.sh>`__ to
-  fine-tune pegasus on the XSUM dataset. Data download instructions at `examples/seq2seq/
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+- :prefix_link:`Script <examples/seq2seq/finetune_pegasus_xsum.sh>` to fine-tune pegasus on the XSUM dataset. Data
+  download instructions at :prefix_link:`examples/seq2seq/ <examples/seq2seq/README.md>`.
 - FP16 is not supported (help/ideas on this appreciated!).
 - The adafactor optimizer is recommended for pegasus fine-tuning.
 
diff --git a/docs/source/multilingual.rst b/docs/source/multilingual.rst
index b18d056b163..d109a961a7c 100644
--- a/docs/source/multilingual.rst
+++ b/docs/source/multilingual.rst
@@ -90,9 +90,8 @@ You can then feed it all as input to your model:
     >>> outputs = model(input_ids, langs=langs)
 
 
-The example `run_generation.py
-<https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py>`__ can generate
-text using the CLM checkpoints from XLM, using the language embeddings.
+The example :prefix_link:`run_generation.py <examples/text-generation/run_generation.py>` can generate text using the
+CLM checkpoints from XLM, using the language embeddings.
 
 XLM without Language Embeddings
 -----------------------------------------------------------------------------------------------------------------------
diff --git a/docs/source/task_summary.rst b/docs/source/task_summary.rst
index 94cc615609e..96d467d6b41 100644
--- a/docs/source/task_summary.rst
+++ b/docs/source/task_summary.rst
@@ -750,8 +750,7 @@ Summarization is the task of summarizing a document or an article into a shorter
 
 An example of a summarization dataset is the CNN / Daily Mail dataset, which consists of long news articles and was
 created for the task of summarization. If you would like to fine-tune a model on a summarization task, various
-approaches are described in this `document
-<https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+approaches are described in this :prefix_link:`document <examples/seq2seq/README.md>`.
 
 Here is an example of using the pipelines to do summarization. It leverages a Bart model that was fine-tuned on the CNN
 / Daily Mail data set.
@@ -829,8 +828,7 @@ Translation is the task of translating a text from one language to another.
 
 An example of a translation dataset is the WMT English to German dataset, which has sentences in English as the input
 data and the corresponding sentences in German as the target data. If you would like to fine-tune a model on a
-translation task, various approaches are described in this `document
-<https://github.com/huggingface/transformers/blob/master/examples/seq2seq/README.md>`__.
+translation task, various approaches are described in this :prefix_link:`document <examples/seq2seq/README.md>`.
 
 Here is an example of using the pipelines to do translation. It leverages a T5 model that was only pre-trained on a
 multi-task mixture dataset (including WMT), yet, yielding impressive translation results.
diff --git a/docs/source/testing.rst b/docs/source/testing.rst
index c9cd37e75eb..4dffe15b9e4 100644
--- a/docs/source/testing.rst
+++ b/docs/source/testing.rst
@@ -25,25 +25,22 @@ How transformers are tested
 -----------------------------------------------------------------------------------------------------------------------
 
 1. Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
-   are defined in this `config file <https://github.com/huggingface/transformers/blob/master/.circleci/config.yml>`__,
-   so that if needed you can reproduce the same environment on your machine.
+   are defined in this :prefix_link:`config file <.circleci/config.yml>`, so that if needed you can reproduce the same
+   environment on your machine.
 
    These CI jobs don't run ``@slow`` tests.
 
 2. There are 3 jobs run by `github actions <https://github.com/huggingface/transformers/actions>`__:
 
-   * `torch hub integration
-     <https://github.com/huggingface/transformers/blob/master/.github/workflows/github-torch-hub.yml>`__: checks
-     whether torch hub integration works.
+   * :prefix_link:`torch hub integration <.github/workflows/github-torch-hub.yml>`: checks whether torch hub
+     integration works.
 
-   * `self-hosted (push) <https://github.com/huggingface/transformers/blob/master/.github/workflows/self-push.yml>`__:
-     runs fast tests on GPU only on commits on ``master``. It only runs if a commit on ``master`` has updated the code
-     in one of the following folders: ``src``, ``tests``, ``.github`` (to prevent running on added model cards,
-     notebooks, etc.)
+   * :prefix_link:`self-hosted (push) <.github/workflows/self-push.yml>`: runs fast tests on GPU only on commits on
+     ``master``. It only runs if a commit on ``master`` has updated the code in one of the following folders: ``src``,
+     ``tests``, ``.github`` (to prevent running on added model cards, notebooks, etc.)
 
-   * `self-hosted runner
-     <https://github.com/huggingface/transformers/blob/master/.github/workflows/self-scheduled.yml>`__: runs normal and
-     slow tests on GPU in ``tests`` and ``examples``:
+   * :prefix_link:`self-hosted runner <.github/workflows/self-scheduled.yml>`: runs normal and slow tests on GPU in
+     ``tests`` and ``examples``:
 
    .. code-block:: bash
 
@@ -492,12 +489,9 @@ spawns a normal process that then spawns off multiple workers and manages the IO
 
 This is still under development but you can study 2 different tests that perform this successfully:
 
-* `test_seq2seq_examples_multi_gpu.py
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/test_seq2seq_examples_multi_gpu.py>`__ - a
+* :prefix_link:`test_seq2seq_examples_multi_gpu.py <examples/seq2seq/test_seq2seq_examples_multi_gpu.py>` - a
   ``pytorch-lightning``-running test (had to use PL's ``ddp`` spawning method which is the default)
-* `test_finetune_trainer.py
-  <https://github.com/huggingface/transformers/blob/master/examples/seq2seq/test_finetune_trainer.py>`__ - a normal
-  (non-PL) test
+* :prefix_link:`test_finetune_trainer.py <examples/seq2seq/test_finetune_trainer.py>` - a normal (non-PL) test
 
 To jump right into the execution point, search for the ``execute_subprocess_async`` function in those tests.
 
@@ -940,10 +934,9 @@ slow models to do qualitative testing. To see the use of these simply look for *
 
     grep tiny tests examples
 
-Here is a an example of a `script
-<https://github.com/huggingface/transformers/blob/master/scripts/fsmt/fsmt-make-tiny-model.py>`__ that created the tiny
-model `stas/tiny-wmt19-en-de <https://huggingface.co/stas/tiny-wmt19-en-de>`__. You can easily adjust it to your
-specific model's architecture.
+Here is a an example of a :prefix_link:`script <scripts/fsmt/fsmt-make-tiny-model.py>` that created the tiny model
+`stas/tiny-wmt19-en-de <https://huggingface.co/stas/tiny-wmt19-en-de>`__. You can easily adjust it to your specific
+model's architecture.
 
 It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
 you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the