Merge pull request #2270 from aaugustin/remove-python-2

Remove support for Python 2
2025-08-02 19:21:31 +06:00 · 2019-12-22 23:04:37 +01:00 · 2019-12-22 23:04:37 +01:00 · ce50305e5b
commit ce50305e5b
parent b6ea0f43ae 1a948d7020
155 changed files with 217 additions and 603 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1,6 +1,6 @@
 version: 2
 jobs:
-    run_tests_py3_torch_and_tf:
+    run_tests_torch_and_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@ -17,7 +17,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_torch:
+    run_tests_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@ -33,7 +33,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_tf:
+    run_tests_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@ -49,7 +49,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_custom_tokenizers:
+    run_tests_custom_tokenizers:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@ -59,7 +59,7 @@ jobs:
            - run: sudo pip install pytest pytest-xdist
            - run: sudo pip install mecab-python3
            - run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py
-    run_examples_py3_torch:
+    run_examples_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@ -121,9 +121,9 @@ workflows:
        jobs:
            - check_code_quality
            - check_repository_consistency
-            - run_examples_py3_torch
-            - run_tests_py3_custom_tokenizers
-            - run_tests_py3_torch_and_tf
-            - run_tests_py3_torch
-            - run_tests_py3_tf
+            - run_examples_torch
+            - run_tests_custom_tokenizers
+            - run_tests_torch_and_tf
+            - run_tests_torch
+            - run_tests_tf
            - deploy_doc: *workflow_filters
--- a/README.md
+++ b/README.md
@ -64,7 +64,7 @@ Choose the right framework for every part of a model's lifetime

 ## Installation

-This repo is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+), PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1
+This repo is tested on Python 3.5+, PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1

 ### With pip

--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@ -1,6 +1,6 @@
 # Installation

-Transformers is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+) and PyTorch 1.1.0
+Transformers is tested on Python 3.5+ and PyTorch 1.1.0

 ## With pip

@ -44,7 +44,7 @@ By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `

 ## OpenAI GPT original tokenization workflow

-If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` (use version 4.4.3 if you are using Python 2) and `SpaCy`:
+If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` and `SpaCy`:

 ``` bash
 pip install spacy ftfy==4.4.3
--- a/examples/contrib/run_swag.py
+++ b/examples/contrib/run_swag.py
@ -16,7 +16,7 @@
 """BERT finetuning runner.
   Finetuning the library models for multiple choice on SWAG (Bert).
 """
-from __future__ import absolute_import, division, print_function
+

 import argparse
 import csv
@ -24,7 +24,6 @@ import glob
 import logging
 import os
 import random
-import sys

 import numpy as np
 import torch
@ -104,12 +103,7 @@ class InputFeatures(object):

 def read_swag_examples(input_file, is_training=True):
    with open(input_file, "r", encoding="utf-8") as f:
-        reader = csv.reader(f)
-        lines = []
-        for line in reader:
-            if sys.version_info[0] == 2:
-                line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-            lines.append(line)
+        lines = list(csv.reader(f))

    if is_training and lines[0][-1] != "label":
        raise ValueError("For training, the input file must contain a label column.")
@ -347,7 +341,7 @@ def train(args, train_dataset, model, tokenizer):
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/contrib/run_transfo_xl.py
+++ b/examples/contrib/run_transfo_xl.py
@ -19,7 +19,7 @@

    This script with default values evaluates a pretrained Transformer-XL on WikiText 103
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import argparse
 import logging
--- a/examples/distillation/run_squad_w_distillation.py
+++ b/examples/distillation/run_squad_w_distillation.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ This is the exact same script as `examples/run_squad.py` (as of 2019, October 4th) with an additional and optional step of distillation."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -160,7 +159,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/mm-imdb/run_mmimdb.py
+++ b/examples/mm-imdb/run_mmimdb.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for multimodal multiclass prediction on MM-IMDB dataset."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer, criterion):
    best_f1, n_no_improve = 0, 0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/run_generation.py
+++ b/examples/run_generation.py
@ -16,7 +16,7 @@
 # limitations under the License.
 """ Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet)
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import argparse
 import logging
--- a/examples/run_glue.py
+++ b/examples/run_glue.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/run_lm_finetuning.py
+++ b/examples/run_lm_finetuning.py
@ -19,7 +19,6 @@ GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while B
 using a masked language modeling (MLM) loss.
 """

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -282,7 +281,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproducibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproducibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/run_multiple_choice.py
+++ b/examples/run_multiple_choice.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for multiple choice (Bert, Roberta, XLNet)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -146,7 +145,7 @@ def train(args, train_dataset, model, tokenizer):
    best_steps = 0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/run_ner.py
+++ b/examples/run_ner.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Fine-tuning the library models for named entity recognition on CoNLL-2003 (Bert or Roberta). """

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -170,7 +169,7 @@ def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    # Added here for reproductibility (even between python 2 and 3)
+    # Added here for reproductibility
    set_seed(args)

    for _ in train_iterator:
--- a/examples/run_xnli.py
+++ b/examples/run_xnli.py
@ -16,7 +16,6 @@
 """ Finetuning multi-lingual models on XNLI (Bert, DistilBERT, XLM).
    Adapted from `examples/run_glue.py`"""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/examples/summarization/utils_summarization.py
+++ b/examples/summarization/utils_summarization.py
@ -94,7 +94,7 @@ def process_story(raw_story):


 def _add_missing_period(line):
-    END_TOKENS = [".", "!", "?", "...", "'", "`", '"', u"\u2019", u"\u2019", ")"]
+    END_TOKENS = [".", "!", "?", "...", "'", "`", '"', "\u2019", "\u2019", ")"]
    if line.startswith("@highlight"):
        return line
    if line[-1] in END_TOKENS:
--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@ -12,25 +12,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, division, print_function
+

 import argparse
 import logging
 import sys
 import unittest
+from unittest.mock import patch

 import run_generation
 import run_glue
 import run_squad


-try:
-    # python 3.4+ can use builtin unittest.mock instead of mock package
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-
 logging.basicConfig(level=logging.DEBUG)

 logger = logging.getLogger()
--- a/examples/utils_multiple_choice.py
+++ b/examples/utils_multiple_choice.py
@ -15,15 +15,12 @@
 # limitations under the License.
 """ Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension  """

-from __future__ import absolute_import, division, print_function

 import csv
 import glob
 import json
 import logging
 import os
-import sys
-from io import open
 from typing import List

 import tqdm
@ -180,13 +177,7 @@ class SwagProcessor(DataProcessor):

    def _read_csv(self, input_file):
        with open(input_file, "r", encoding="utf-8") as f:
-            reader = csv.reader(f)
-            lines = []
-            for line in reader:
-                if sys.version_info[0] == 2:
-                    line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-                lines.append(line)
-            return lines
+            return list(csv.reader(f))

    def _create_examples(self, lines: List[List[str]], type: str):
        """Creates examples for the training and dev sets."""
--- a/examples/utils_ner.py
+++ b/examples/utils_ner.py
@ -15,11 +15,9 @@
 # limitations under the License.
 """ Named entity recognition fine-tuning: utilities to work with CoNLL-2003 task. """

-from __future__ import absolute_import, division, print_function

 import logging
 import os
-from io import open


 logger = logging.getLogger(__name__)
--- a/setup.py
+++ b/setup.py
@ -14,7 +14,7 @@ To create the package for pypi.
   creating the wheel and the source distribution (obviously).

   For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
-   (this will build a wheel for the python version you use to build it - make sure you use python 3.x).
+   (this will build a wheel for the python version you use to build it).

   For the sources, run: "python setup.py sdist"
   You should now have a /dist directory with both .whl and .tar.gz source versions.
@ -33,7 +33,6 @@ To create the package for pypi.
 7. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.

 """
-from io import open

 from setuptools import find_packages, setup

--- a/src/transformers/commands/user.py
+++ b/src/transformers/commands/user.py
@ -34,8 +34,8 @@ class ANSI:
    Helper for en.wikipedia.org/wiki/ANSI_escape_code
    """

-    _bold = u"\u001b[1m"
-    _reset = u"\u001b[0m"
+    _bold = "\u001b[1m"
+    _reset = "\u001b[0m"

    @classmethod
    def bold(cls, s):
--- a/src/transformers/configuration_auto.py
+++ b/src/transformers/configuration_auto.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Auto Model class. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_bert.py
+++ b/src/transformers/configuration_bert.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ BERT model configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_camembert.py
+++ b/src/transformers/configuration_camembert.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ CamemBERT configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_ctrl.py
+++ b/src/transformers/configuration_ctrl.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Salesforce CTRL configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_distilbert.py
+++ b/src/transformers/configuration_distilbert.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ DistilBERT model configuration """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging

--- a/src/transformers/configuration_gpt2.py
+++ b/src/transformers/configuration_gpt2.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ OpenAI GPT-2 configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_mmbt.py
+++ b/src/transformers/configuration_mmbt.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ MMBT configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_openai.py
+++ b/src/transformers/configuration_openai.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ OpenAI GPT configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_roberta.py
+++ b/src/transformers/configuration_roberta.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ RoBERTa configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_t5.py
+++ b/src/transformers/configuration_t5.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ T5 model configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_transfo_xl.py
+++ b/src/transformers/configuration_transfo_xl.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ Transformer XL configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@ -15,13 +15,11 @@
 # limitations under the License.
 """ Configuration base class and utilities."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import copy
 import json
 import logging
 import os
-from io import open

 from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url

--- a/src/transformers/configuration_xlm.py
+++ b/src/transformers/configuration_xlm.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ XLM configuration """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging

--- a/src/transformers/configuration_xlm_roberta.py
+++ b/src/transformers/configuration_xlm_roberta.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ XLM-RoBERTa configuration """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/configuration_xlnet.py
+++ b/src/transformers/configuration_xlnet.py
@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ XLNet configuration """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging

--- a/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert ALBERT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert BERT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py
@ -14,11 +14,9 @@
 # limitations under the License.
 """Convert OpenAI GPT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
-from io import open

 import torch

--- a/src/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py
@ -14,11 +14,9 @@
 # limitations under the License.
 """Convert OpenAI GPT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
-from io import open

 import torch

--- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py
+++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Convert pytorch checkpoints to TensorFlow """

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert RoBERTa checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert T5 checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
@ -14,13 +14,12 @@
 # limitations under the License.
 """Convert Transformer XL checkpoint and datasets."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
 import os
+import pickle
 import sys
-from io import open

 import torch

@ -35,12 +34,6 @@ from transformers import (
 from transformers.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES


-if sys.version_info[0] == 2:
-    import cPickle as pickle
-else:
-    import pickle
-
-
 logging.basicConfig(level=logging.INFO)

 # We do this to be able to load python 2 datasets pickles
--- a/src/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
@ -14,12 +14,10 @@
 # limitations under the License.
 """Convert OpenAI GPT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import json
 import logging
-from io import open

 import numpy
 import torch
--- a/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert BERT checkpoint."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import logging
--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@ -14,7 +14,6 @@ import logging
 import math
 import re
 import string
-from io import open

 from transformers.tokenization_bert import BasicTokenizer

--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@ -18,7 +18,6 @@ import copy
 import csv
 import json
 import logging
-import sys

 from ...file_utils import is_tf_available, is_torch_available

@ -98,13 +97,7 @@ class DataProcessor(object):
    def _read_tsv(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8-sig") as f:
-            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
-            lines = []
-            for line in reader:
-                if sys.version_info[0] == 2:
-                    line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-                lines.append(line)
-            return lines
+            return list(csv.reader(f, delimiter="\t", quotechar=quotechar))


 class SingleSentenceClassificationProcessor(DataProcessor):
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ XNLI utils (dataset loading and evaluation) """

-from __future__ import absolute_import, division, print_function

 import logging
 import os
--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@ -3,7 +3,7 @@ Utilities for working with the local dataset cache.
 This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
 Copyright by the AllenNLP authors.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import fnmatch
 import json
@ -14,11 +14,10 @@ import tempfile
 from contextlib import contextmanager
 from functools import partial, wraps
 from hashlib import sha256
-from io import open
+from urllib.parse import urlparse

 import boto3
 import requests
-import six
 from botocore.config import Config
 from botocore.exceptions import ClientError
 from filelock import FileLock
@ -66,10 +65,6 @@ except ImportError:
    )
 default_cache_path = os.path.join(torch_cache_home, "transformers")

-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse

 try:
    from pathlib import Path
@ -107,36 +102,20 @@ def is_tf_available():
    return _tf_available


-if not six.PY2:
+def add_start_docstrings(*docstr):
+    def docstring_decorator(fn):
+        fn.__doc__ = "".join(docstr) + fn.__doc__
+        return fn

-    def add_start_docstrings(*docstr):
-        def docstring_decorator(fn):
-            fn.__doc__ = "".join(docstr) + fn.__doc__
-            return fn
-
-        return docstring_decorator
-
-    def add_end_docstrings(*docstr):
-        def docstring_decorator(fn):
-            fn.__doc__ = fn.__doc__ + "".join(docstr)
-            return fn
-
-        return docstring_decorator
+    return docstring_decorator


-else:
-    # Not possible to update class docstrings on python2
-    def add_start_docstrings(*docstr):
-        def docstring_decorator(fn):
-            return fn
+def add_end_docstrings(*docstr):
+    def docstring_decorator(fn):
+        fn.__doc__ = fn.__doc__ + "".join(docstr)
+        return fn

-        return docstring_decorator
-
-    def add_end_docstrings(*docstr):
-        def docstring_decorator(fn):
-            return fn
-
-        return docstring_decorator
+    return docstring_decorator


 def is_remote_url(url_or_filename):
@ -183,7 +162,7 @@ def filename_to_url(filename, cache_dir=None):
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
+    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    cache_path = os.path.join(cache_dir, filename)
@ -218,9 +197,9 @@ def cached_path(
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(url_or_filename, Path):
+    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
+    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    if is_remote_url(url_or_filename):
@ -297,7 +276,7 @@ def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
    ua = "transformers/{}; python/{}".format(__version__, sys.version.split()[0])
    if isinstance(user_agent, dict):
        ua += "; " + "; ".join("{}/{}".format(k, v) for k, v in user_agent.items())
-    elif isinstance(user_agent, six.string_types):
+    elif isinstance(user_agent, str):
        ua += "; " + user_agent
    headers = {"user-agent": ua}
    if resume_size > 0:
@ -331,9 +310,7 @@ def get_from_cache(
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
-        cache_dir = str(cache_dir)
-    if sys.version_info[0] == 2 and not isinstance(cache_dir, str):
+    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    if not os.path.exists(cache_dir):
@ -352,8 +329,6 @@ def get_from_cache(
        except (EnvironmentError, requests.exceptions.Timeout):
            etag = None

-    if sys.version_info[0] == 2 and etag is not None:
-        etag = etag.decode("utf-8")
    filename = url_to_filename(url, etag)

    # get cache path to put the file
@ -417,9 +392,6 @@ def get_from_cache(
                meta = {"url": url, "etag": etag}
                meta_path = cache_path + ".json"
                with open(meta_path, "w") as meta_file:
-                    output_string = json.dumps(meta)
-                    if sys.version_info[0] == 2 and isinstance(output_string, str):
-                        output_string = unicode(output_string, "utf-8")  # noqa: F821
-                    meta_file.write(output_string)
+                    json.dump(meta, meta_file)

    return cache_path
--- a/src/transformers/hf_api.py
+++ b/src/transformers/hf_api.py
@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, division, print_function
+

 import io
 import os
@ -20,7 +20,6 @@ from os.path import expanduser
 from typing import List

 import requests
-import six
 from tqdm import tqdm


@ -28,14 +27,7 @@ ENDPOINT = "https://huggingface.co"


 class S3Obj:
-    def __init__(
-        self,
-        filename,  # type: str
-        LastModified,  # type: str
-        ETag,  # type: str
-        Size,  # type: int
-        **kwargs
-    ):
+    def __init__(self, filename: str, LastModified: str, ETag: str, Size: int, **kwargs):
        self.filename = filename
        self.LastModified = LastModified
        self.ETag = ETag
@ -43,13 +35,7 @@ class S3Obj:


 class PresignedUrl:
-    def __init__(
-        self,
-        write,  # type: str
-        access,  # type: str
-        type,  # type: str
-        **kwargs
-    ):
+    def __init__(self, write: str, access: str, type: str, **kwargs):
        self.write = write
        self.access = access
        self.type = type  # mime-type to send to S3.
@ -59,12 +45,7 @@ class HfApi:
    def __init__(self, endpoint=None):
        self.endpoint = endpoint if endpoint is not None else ENDPOINT

-    def login(
-        self,
-        username,  # type: str
-        password,  # type: str
-    ):
-        # type: (...) -> str
+    def login(self, username: str, password: str) -> str:
        """
        Call HF API to sign in a user and get a token if credentials are valid.

@ -80,10 +61,7 @@ class HfApi:
        d = r.json()
        return d["token"]

-    def whoami(
-        self, token,  # type: str
-    ):
-        # type: (...) -> str
+    def whoami(self, token: str) -> str:
        """
        Call HF API to know "whoami"
        """
@ -93,8 +71,7 @@ class HfApi:
        d = r.json()
        return d["user"]

-    def logout(self, token):
-        # type: (...) -> None
+    def logout(self, token: str) -> None:
        """
        Call HF API to log out.
        """
@ -102,19 +79,17 @@ class HfApi:
        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)})
        r.raise_for_status()

-    def presign(self, token, filename):
-        # type: (...) -> PresignedUrl
+    def presign(self, token: str, filename) -> PresignedUrl:
        """
        Call HF API to get a presigned url to upload `filename` to S3.
        """
        path = "{}/api/presign".format(self.endpoint)
-        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename},)
+        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename})
        r.raise_for_status()
        d = r.json()
        return PresignedUrl(**d)

-    def presign_and_upload(self, token, filename, filepath):
-        # type: (...) -> str
+    def presign_and_upload(self, token: str, filename, filepath) -> str:
        """
        Get a presigned url, then upload file to S3.

@ -158,13 +133,10 @@ class TqdmProgressFileReader:

    def __init__(self, f: io.BufferedReader):
        self.f = f
-        self.total_size = os.fstat(f.fileno()).st_size  # type: int
+        self.total_size = os.fstat(f.fileno()).st_size
        self.pbar = tqdm(total=self.total_size, leave=False)
-        if six.PY3:
-            # does not work unless PY3
-            # no big deal as the CLI does not currently support PY2 anyways.
-            self.read = f.read
-            f.read = self._read
+        self.read = f.read
+        f.read = self._read

    def _read(self, n=-1):
        self.pbar.update(n)
@ -182,16 +154,7 @@ class HfFolder:
        """
        Save token, creating folder as needed.
        """
-        if six.PY3:
-            os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
-        else:
-            # Python 2
-            try:
-                os.makedirs(os.path.dirname(cls.path_token))
-            except OSError as e:
-                if e.errno != os.errno.EEXIST:
-                    raise e
-                pass
+        os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
        with open(cls.path_token, "w+") as f:
            f.write(token)

--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@ -14,13 +14,11 @@
 # limitations under the License.
 """ Configuration base class and utilities."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import copy
 import json
 import logging
 import os
-from io import open

 from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
 from .file_utils import (
--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Auto Model class. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@ -15,12 +15,10 @@
 # limitations under the License.
 """PyTorch BERT model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import math
 import os
-import sys

 import torch
 from torch import nn
@ -339,9 +337,7 @@ class BertIntermediate(nn.Module):
    def __init__(self, config):
        super(BertIntermediate, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act
@ -461,9 +457,7 @@ class BertPredictionHeadTransform(nn.Module):
    def __init__(self, config):
        super(BertPredictionHeadTransform, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act
--- a/src/transformers/modeling_camembert.py
+++ b/src/transformers/modeling_camembert.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch CamemBERT model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_ctrl.py
+++ b/src/transformers/modeling_ctrl.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ PyTorch CTRL model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_distilbert.py
+++ b/src/transformers/modeling_distilbert.py
@ -16,7 +16,7 @@
    adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM)
    and in part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import copy
 import logging
--- a/src/transformers/modeling_encoder_decoder.py
+++ b/src/transformers/modeling_encoder_decoder.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Classes to support Encoder-Decoder architectures """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import os
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch OpenAI GPT-2 model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import math
--- a/src/transformers/modeling_mmbt.py
+++ b/src/transformers/modeling_mmbt.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch MMBT model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@ -15,13 +15,11 @@
 # limitations under the License.
 """PyTorch OpenAI GPT model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import json
 import logging
 import math
 import os
-from io import open

 import torch
 import torch.nn as nn
--- a/src/transformers/modeling_roberta.py
+++ b/src/transformers/modeling_roberta.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch RoBERTa model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_t5.py
+++ b/src/transformers/modeling_t5.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ PyTorch T5 model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import copy
 import itertools
--- a/src/transformers/modeling_tf_albert.py
+++ b/src/transformers/modeling_tf_albert.py
@ -14,10 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ TF 2.0 ALBERT model. """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
-import sys

 import tensorflow as tf

@ -311,9 +310,7 @@ class TFAlbertLayer(tf.keras.layers.Layer):
            config.intermediate_size, kernel_initializer=get_initializer(config.initializer_range), name="ffn"
        )

-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.activation = ACT2FN[config.hidden_act]
        else:
            self.activation = config.hidden_act
@ -454,9 +451,7 @@ class TFAlbertMLMHead(tf.keras.layers.Layer):
        self.dense = tf.keras.layers.Dense(
            config.embedding_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )
-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.activation = ACT2FN[config.hidden_act]
        else:
            self.activation = config.hidden_act
--- a/src/transformers/modeling_tf_auto.py
+++ b/src/transformers/modeling_tf_auto.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Auto Model class. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@ -15,10 +15,8 @@
 # limitations under the License.
 """ TF 2.0 BERT model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
-import sys

 import numpy as np
 import tensorflow as tf
@ -311,9 +309,7 @@ class TFBertIntermediate(tf.keras.layers.Layer):
        self.dense = tf.keras.layers.Dense(
            config.intermediate_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )
-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act
@ -418,9 +414,7 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
        self.dense = tf.keras.layers.Dense(
            config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )
-        if isinstance(config.hidden_act, str) or (
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
+        if isinstance(config.hidden_act, str):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act
--- a/src/transformers/modeling_tf_ctrl.py
+++ b/src/transformers/modeling_tf_ctrl.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ TF 2.0 CTRL model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_distilbert.py
+++ b/src/transformers/modeling_tf_distilbert.py
@ -14,7 +14,7 @@
 # limitations under the License.
 """ TF 2.0 DistilBERT model
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import math
--- a/src/transformers/modeling_tf_gpt2.py
+++ b/src/transformers/modeling_tf_gpt2.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ TF 2.0 OpenAI GPT-2 model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_openai.py
+++ b/src/transformers/modeling_tf_openai.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ TF 2.0 OpenAI GPT model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_pytorch_utils.py
+++ b/src/transformers/modeling_tf_pytorch_utils.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ PyTorch - TF 2.0 general utilities."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import os
--- a/src/transformers/modeling_tf_roberta.py
+++ b/src/transformers/modeling_tf_roberta.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ TF 2.0 RoBERTa model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_t5.py
+++ b/src/transformers/modeling_tf_t5.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """ TF 2.0 T5 model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import copy
 import itertools
--- a/src/transformers/modeling_tf_transfo_xl.py
+++ b/src/transformers/modeling_tf_transfo_xl.py
@ -16,7 +16,6 @@
 """ TF 2.0 Transformer XL model.
 """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """TF general model utils."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import os
--- a/src/transformers/modeling_tf_xlm.py
+++ b/src/transformers/modeling_tf_xlm.py
@ -14,7 +14,7 @@
 # limitations under the License.
 """ TF 2.0 XLM model.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import itertools
 import logging
--- a/src/transformers/modeling_tf_xlnet.py
+++ b/src/transformers/modeling_tf_xlnet.py
@ -15,10 +15,9 @@
 # limitations under the License.
 """ TF 2.0 XLNet model.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
-import sys

 import numpy as np
 import tensorflow as tf
@ -290,9 +289,7 @@ class TFXLNetFeedForward(tf.keras.layers.Layer):
            config.d_model, kernel_initializer=get_initializer(config.initializer_range), name="layer_2"
        )
        self.dropout = tf.keras.layers.Dropout(config.dropout)
-        if isinstance(config.ff_activation, str) or (
-            sys.version_info[0] == 2 and isinstance(config.ff_activation, unicode)  # noqa: F821
-        ):
+        if isinstance(config.ff_activation, str):
            self.activation_function = ACT2FN[config.ff_activation]
        else:
            self.activation_function = config.ff_activation
--- a/src/transformers/modeling_transfo_xl.py
+++ b/src/transformers/modeling_transfo_xl.py
@ -18,7 +18,6 @@
    In particular https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py
 """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch BERT model."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import os
--- a/src/transformers/modeling_xlm.py
+++ b/src/transformers/modeling_xlm.py
@ -14,7 +14,7 @@
 # limitations under the License.
 """ PyTorch XLM model.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import itertools
 import logging
--- a/src/transformers/modeling_xlm_roberta.py
+++ b/src/transformers/modeling_xlm_roberta.py
@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch XLM-RoBERTa model. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@ -15,11 +15,10 @@
 # limitations under the License.
 """ PyTorch XLNet model.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import math
-import sys

 import torch
 from torch import nn
@ -420,9 +419,7 @@ class XLNetFeedForward(nn.Module):
        self.layer_1 = nn.Linear(config.d_model, config.d_inner)
        self.layer_2 = nn.Linear(config.d_inner, config.d_model)
        self.dropout = nn.Dropout(config.dropout)
-        if isinstance(config.ff_activation, str) or (
-            sys.version_info[0] == 2 and isinstance(config.ff_activation, unicode)  # noqa: F821
-        ):
+        if isinstance(config.ff_activation, str):
            self.activation_function = ACT2FN[config.ff_activation]
        else:
            self.activation_function = config.ff_activation
--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@ -14,7 +14,6 @@
 # ==============================================================================
 """Functions and classes related to optimization (weight updates)."""

-from __future__ import absolute_import, division, print_function

 import re

--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import csv
 import json
@ -26,7 +26,6 @@ from os.path import abspath, exists
 from typing import Dict, List, Optional, Tuple, Union

 import numpy as np
-import six

 from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig
 from .configuration_utils import PretrainedConfig
@ -939,7 +938,7 @@ def pipeline(
            modelcard = config

    # Instantiate tokenizer if needed
-    if isinstance(tokenizer, six.string_types):
+    if isinstance(tokenizer, str):
        tokenizer = AutoTokenizer.from_pretrained(tokenizer)

    # Instantiate config if needed
--- a/src/transformers/tokenization_albert.py
+++ b/src/transformers/tokenization_albert.py
@ -13,15 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Tokenization classes for ALBERT model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import os
 import unicodedata
 from shutil import copyfile

-import six
-
 from .tokenization_utils import PreTrainedTokenizer


@ -139,9 +137,6 @@ class AlbertTokenizer(PreTrainedTokenizer):
            outputs = inputs
        outputs = outputs.replace("``", '"').replace("''", '"')

-        if six.PY2 and isinstance(outputs, str):
-            outputs = outputs.decode("utf-8")
-
        if not self.keep_accents:
            outputs = unicodedata.normalize("NFKD", outputs)
            outputs = "".join([c for c in outputs if not unicodedata.combining(c)])
@ -150,14 +145,9 @@ class AlbertTokenizer(PreTrainedTokenizer):

        return outputs

-    def _tokenize(self, text, return_unicode=True, sample=False):
-        """ Tokenize a string.
-            return_unicode is used only for py2
-        """
+    def _tokenize(self, text, sample=False):
+        """ Tokenize a string. """
        text = self.preprocess_text(text)
-        # note(zhiliny): in some systems, sentencepiece only accepts str for py2
-        if six.PY2 and isinstance(text, unicode):  # noqa: F821
-            text = text.encode("utf-8")

        if not sample:
            pieces = self.sp_model.EncodeAsPieces(text)
@ -177,27 +167,15 @@ class AlbertTokenizer(PreTrainedTokenizer):
            else:
                new_pieces.append(piece)

-        # note(zhiliny): convert back to unicode for py2
-        if six.PY2 and return_unicode:
-            ret_pieces = []
-            for piece in new_pieces:
-                if isinstance(piece, str):
-                    piece = piece.decode("utf-8")
-                ret_pieces.append(piece)
-            new_pieces = ret_pieces
-
        return new_pieces

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.sp_model.PieceToId(token)

-    def _convert_id_to_token(self, index, return_unicode=True):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
-        token = self.sp_model.IdToPiece(index)
-        if six.PY2 and return_unicode and isinstance(token, str):
-            token = token.decode("utf-8")
-        return token
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.sp_model.IdToPiece(index)

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (strings for sub-words) in a single string."""
--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Auto Model class. """

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/tokenization_bert.py
+++ b/src/transformers/tokenization_bert.py
@ -14,13 +14,11 @@
 # limitations under the License.
 """Tokenization classes."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import collections
 import logging
 import os
 import unicodedata
-from io import open

 from .tokenization_utils import PreTrainedTokenizer

@ -203,11 +201,11 @@ class BertTokenizer(PreTrainedTokenizer):
        return split_tokens

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.vocab.get(token, self.vocab.get(self.unk_token))

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        return self.ids_to_tokens.get(index, self.unk_token)

    def convert_tokens_to_string(self, tokens):
--- a/src/transformers/tokenization_bert_japanese.py
+++ b/src/transformers/tokenization_bert_japanese.py
@ -14,15 +14,12 @@
 # limitations under the License.
 """Tokenization classes."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import collections
 import logging
 import os
 import unicodedata

-import six
-
 from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer, load_vocab


@ -195,10 +192,7 @@ class MecabTokenizer(object):
        never_split = self.never_split + (never_split if never_split is not None else [])
        tokens = []

-        if six.PY2:
-            mecab_output = self.mecab.parse(text.encode("utf-8")).decode("utf-8")
-        else:
-            mecab_output = self.mecab.parse(text)
+        mecab_output = self.mecab.parse(text)

        cursor = 0
        for line in mecab_output.split("\n"):
--- a/src/transformers/tokenization_camembert.py
+++ b/src/transformers/tokenization_camembert.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 """ Tokenization classes for Camembert model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import os
@ -155,7 +155,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
        return self.sp_model.EncodeAsPieces(text)

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        if token in self.fairseq_tokens_to_ids:
            return self.fairseq_tokens_to_ids[token]
        elif self.sp_model.PieceToId(token) == 0:
@ -164,7 +164,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
        return self.fairseq_offset + self.sp_model.PieceToId(token)

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        if index in self.fairseq_ids_to_tokens:
            return self.fairseq_ids_to_tokens[index]
        return self.sp_model.IdToPiece(index - self.fairseq_offset)
--- a/src/transformers/tokenization_ctrl.py
+++ b/src/transformers/tokenization_ctrl.py
@ -13,12 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for Salesforce CTRL."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import json
 import logging
 import os
-from io import open

 import regex as re

@ -204,11 +203,11 @@ class CTRLTokenizer(PreTrainedTokenizer):
        return split_tokens

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.encoder.get(token, self.encoder.get(self.unk_token))

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        return self.decoder.get(index, self.unk_token)

    def convert_tokens_to_string(self, tokens):
--- a/src/transformers/tokenization_distilbert.py
+++ b/src/transformers/tokenization_distilbert.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """Tokenization classes for DistilBERT."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging

--- a/src/transformers/tokenization_gpt2.py
+++ b/src/transformers/tokenization_gpt2.py
@ -13,28 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for OpenAI GPT."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import json
 import logging
 import os
-import sys
-from io import open
+from functools import lru_cache

 import regex as re

 from .tokenization_utils import PreTrainedTokenizer


-try:
-    from functools import lru_cache
-except ImportError:
-    # Just a dummy decorator to get the checks to run on python2
-    # because honestly I don't want to support a byte-level unicode BPE tokenizer on python 2 right now.
-    def lru_cache():
-        return lambda func: func
-
-
 logger = logging.getLogger(__name__)

 VOCAB_FILES_NAMES = {
@ -80,7 +70,6 @@ def bytes_to_unicode():
    This is a signficant percentage of your normal, say, 32K bpe vocab.
    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
    """
-    _chr = unichr if sys.version_info[0] == 2 else chr  # noqa: F821
    bs = (
        list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
    )
@ -91,7 +80,7 @@ def bytes_to_unicode():
            bs.append(b)
            cs.append(2 ** 8 + n)
            n += 1
-    cs = [_chr(n) for n in cs]
+    cs = [chr(n) for n in cs]
    return dict(zip(bs, cs))


@ -212,23 +201,18 @@ class GPT2Tokenizer(PreTrainedTokenizer):

        bpe_tokens = []
        for token in re.findall(self.pat, text):
-            if sys.version_info[0] == 2:
-                token = "".join(
-                    self.byte_encoder[ord(b)] for b in token
-                )  # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case)
-            else:
-                token = "".join(
-                    self.byte_encoder[b] for b in token.encode("utf-8")
-                )  # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case)
+            token = "".join(
+                self.byte_encoder[b] for b in token.encode("utf-8")
+            )  # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case)
            bpe_tokens.extend(bpe_token for bpe_token in self.bpe(token).split(" "))
        return bpe_tokens

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.encoder.get(token, self.encoder.get(self.unk_token))

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        return self.decoder.get(index)

    def convert_tokens_to_string(self, tokens):
--- a/src/transformers/tokenization_openai.py
+++ b/src/transformers/tokenization_openai.py
@ -13,13 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for OpenAI GPT."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import json
 import logging
 import os
 import re
-from io import open

 from .tokenization_bert import BasicTokenizer
 from .tokenization_utils import PreTrainedTokenizer
@ -177,7 +176,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
        return split_tokens

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.encoder.get(token, self.encoder.get(self.unk_token))

    def _convert_id_to_token(self, index):
--- a/src/transformers/tokenization_roberta.py
+++ b/src/transformers/tokenization_roberta.py
@ -13,22 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for RoBERTa."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging

 from .tokenization_gpt2 import GPT2Tokenizer


-try:
-    from functools import lru_cache
-except ImportError:
-    # Just a dummy decorator to get the checks to run on python2
-    # because honestly I don't want to support a byte-level unicode BPE tokenizer on python 2 right now.
-    def lru_cache():
-        return lambda func: func
-
-
 logger = logging.getLogger(__name__)

 VOCAB_FILES_NAMES = {
--- a/src/transformers/tokenization_t5.py
+++ b/src/transformers/tokenization_t5.py
@ -14,15 +14,12 @@
 # limitations under the License.
 """ Tokenization class for model T5."""

-from __future__ import absolute_import, division, print_function, unicode_literals

 import logging
 import os
 import re
 from shutil import copyfile

-import six
-
 from .tokenization_utils import PreTrainedTokenizer


@ -138,41 +135,29 @@ class T5Tokenizer(PreTrainedTokenizer):
        self.sp_model = spm.SentencePieceProcessor()
        self.sp_model.Load(self.vocab_file)

-    def _tokenize(self, text, return_unicode=True, sample=False):
+    def _tokenize(self, text, sample=False):
        """ Take as input a string and return a list of strings (tokens) for words/sub-words
        """
        if not sample:
            pieces = self.sp_model.EncodeAsPieces(text)
        else:
            pieces = self.sp_model.SampleEncodeAsPieces(text, 64, 0.1)
-
-        # convert back to unicode for py2
-        if six.PY2 and return_unicode:
-            ret_pieces = []
-            for piece in pieces:
-                if isinstance(piece, str):
-                    piece = piece.decode("utf-8")
-                ret_pieces.append(piece)
-            pieces = ret_pieces
-
        return pieces

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        if token.startswith("<extra_id_"):
            match = re.match(r"<extra_id_(\d+)>", token)
            num = int(match.group(1))
            return self.vocab_size - num - 1
        return self.sp_model.piece_to_id(token)

-    def _convert_id_to_token(self, index, return_unicode=True):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
        if index < self.sp_model.get_piece_size():
            token = self.sp_model.IdToPiece(index)
        else:
            token = "<extra_id_{}>".format(self.vocab_size - 1 - index)
-        if six.PY2 and return_unicode and isinstance(token, str):
-            token = token.decode("utf-8")
        return token

    def convert_tokens_to_string(self, tokens):
--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py
@ -16,14 +16,13 @@
 """ Tokenization classes for Transformer XL model.
    Adapted from https://github.com/kimiyoung/transformer-xl.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import glob
 import logging
 import os
-import sys
+import pickle
 from collections import Counter, OrderedDict
-from io import open

 import numpy as np

@ -36,11 +35,6 @@ try:
 except ImportError:
    pass

-if sys.version_info[0] == 2:
-    import cPickle as pickle
-else:
-    import pickle
-

 logger = logging.getLogger(__name__)

@ -238,7 +232,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        return self.idx2sym[idx]

    def _convert_token_to_id(self, sym):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        if sym in self.sym2idx:
            return self.sym2idx[sym]
        else:
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for OpenAI GPT."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import copy
 import itertools
@ -21,9 +21,6 @@ import json
 import logging
 import os
 import re
-from io import open
-
-import six

 from .file_utils import cached_path, hf_bucket_url, is_remote_url, is_tf_available, is_torch_available

@ -251,11 +248,9 @@ class PreTrainedTokenizer(object):
        for key, value in kwargs.items():
            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
                if key == "additional_special_tokens":
-                    assert isinstance(value, (list, tuple)) and all(
-                        isinstance(t, str) or (six.PY2 and isinstance(t, unicode)) for t in value  # noqa: F821
-                    )
+                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
                else:
-                    assert isinstance(value, str) or (six.PY2 and isinstance(value, unicode))  # noqa: F821
+                    assert isinstance(value, str)
                setattr(self, key, value)

    @classmethod
@ -567,7 +562,7 @@ class PreTrainedTokenizer(object):

        to_add_tokens = []
        for token in new_tokens:
-            assert isinstance(token, str) or (six.PY2 and isinstance(token, unicode))  # noqa: F821
+            assert isinstance(token, str)
            if self.init_kwargs.get("do_lower_case", False) and token not in self.all_special_tokens:
                token = token.lower()
            if (
@ -649,12 +644,10 @@ class PreTrainedTokenizer(object):
        for key, value in special_tokens_dict.items():
            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
            if key == "additional_special_tokens":
-                assert isinstance(value, (list, tuple)) and all(
-                    isinstance(t, str) or (six.PY2 and isinstance(t, unicode)) for t in value  # noqa: F821
-                )
+                assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
                added_tokens += self.add_tokens(value)
            else:
-                assert isinstance(value, str) or (six.PY2 and isinstance(value, unicode))  # noqa: F821
+                assert isinstance(value, str)
                added_tokens += self.add_tokens([value])
            logger.info("Assigning %s to the %s key of the tokenizer", value, key)
            setattr(self, key, value)
@ -740,13 +733,13 @@ class PreTrainedTokenizer(object):
        raise NotImplementedError

    def convert_tokens_to_ids(self, tokens):
-        """ Converts a single token, or a sequence of tokens, (str/unicode) in a single integer id
+        """ Converts a single token, or a sequence of tokens, (str) in a single integer id
            (resp. a sequence of ids), using the vocabulary.
        """
        if tokens is None:
            return None

-        if isinstance(tokens, str) or (six.PY2 and isinstance(tokens, unicode)):  # noqa: F821
+        if isinstance(tokens, str):
            return self._convert_token_to_id_with_added_voc(tokens)

        ids = []
@ -901,9 +894,9 @@ class PreTrainedTokenizer(object):
        """

        def get_input_ids(text):
-            if isinstance(text, six.string_types):
+            if isinstance(text, str):
                return self.convert_tokens_to_ids(self.tokenize(text, **kwargs))
-            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], six.string_types):
+            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], str):
                return self.convert_tokens_to_ids(text)
            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
                return text
@ -1297,7 +1290,7 @@ class PreTrainedTokenizer(object):

    def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
        """ Converts a single index or a sequence of indices (integers) in a token "
-            (resp.) a sequence of tokens (str/unicode), using the vocabulary and added tokens.
+            (resp.) a sequence of tokens (str), using the vocabulary and added tokens.

            Args:
                skip_special_tokens: Don't decode special tokens (self.all_special_tokens). Default: False
--- a/src/transformers/tokenization_xlm.py
+++ b/src/transformers/tokenization_xlm.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes for XLM."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import json
 import logging
@ -21,7 +21,6 @@ import os
 import re
 import sys
 import unicodedata
-from io import open

 import sacremoses as sm

@ -798,11 +797,11 @@ class XLMTokenizer(PreTrainedTokenizer):
        return split_tokens

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.encoder.get(token, self.encoder.get(self.unk_token))

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        return self.decoder.get(index, self.unk_token)

    def convert_tokens_to_string(self, tokens):
--- a/src/transformers/tokenization_xlm_roberta.py
+++ b/src/transformers/tokenization_xlm_roberta.py
@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 """ Tokenization classes for XLM-RoBERTa model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import os
@ -171,13 +171,13 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
        return self.sp_model.EncodeAsPieces(text)

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        if token in self.fairseq_tokens_to_ids:
            return self.fairseq_tokens_to_ids[token]
        return self.sp_model.PieceToId(token) + self.fairseq_offset

    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
+        """Converts an index (integer) in a token (str) using the vocab."""
        if index in self.fairseq_ids_to_tokens:
            return self.fairseq_ids_to_tokens[index]
        return self.sp_model.IdToPiece(index - self.fairseq_offset)
--- a/src/transformers/tokenization_xlnet.py
+++ b/src/transformers/tokenization_xlnet.py
@ -13,15 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Tokenization classes for XLNet model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import logging
 import os
 import unicodedata
 from shutil import copyfile

-import six
-
 from .tokenization_utils import PreTrainedTokenizer


@ -139,9 +137,6 @@ class XLNetTokenizer(PreTrainedTokenizer):
            outputs = inputs
        outputs = outputs.replace("``", '"').replace("''", '"')

-        if six.PY2 and isinstance(outputs, str):
-            outputs = outputs.decode("utf-8")
-
        if not self.keep_accents:
            outputs = unicodedata.normalize("NFKD", outputs)
            outputs = "".join([c for c in outputs if not unicodedata.combining(c)])
@ -150,14 +145,9 @@ class XLNetTokenizer(PreTrainedTokenizer):

        return outputs

-    def _tokenize(self, text, return_unicode=True, sample=False):
-        """ Tokenize a string.
-            return_unicode is used only for py2
-        """
+    def _tokenize(self, text, sample=False):
+        """ Tokenize a string. """
        text = self.preprocess_text(text)
-        # note(zhiliny): in some systems, sentencepiece only accepts str for py2
-        if six.PY2 and isinstance(text, unicode):  # noqa: F821
-            text = text.encode("utf-8")

        if not sample:
            pieces = self.sp_model.EncodeAsPieces(text)
@ -177,27 +167,15 @@ class XLNetTokenizer(PreTrainedTokenizer):
            else:
                new_pieces.append(piece)

-        # note(zhiliny): convert back to unicode for py2
-        if six.PY2 and return_unicode:
-            ret_pieces = []
-            for piece in new_pieces:
-                if isinstance(piece, str):
-                    piece = piece.decode("utf-8")
-                ret_pieces.append(piece)
-            new_pieces = ret_pieces
-
        return new_pieces

    def _convert_token_to_id(self, token):
-        """ Converts a token (str/unicode) in an id using the vocab. """
+        """ Converts a token (str) in an id using the vocab. """
        return self.sp_model.PieceToId(token)

-    def _convert_id_to_token(self, index, return_unicode=True):
-        """Converts an index (integer) in a token (string/unicode) using the vocab."""
-        token = self.sp_model.IdToPiece(index)
-        if six.PY2 and return_unicode and isinstance(token, str):
-            token = token.decode("utf-8")
-        return token
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.sp_model.IdToPiece(index)

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (strings for sub-words) in a single string."""
--- a/templates/adding_a_new_example_script/run_xxx.py
+++ b/templates/adding_a_new_example_script/run_xxx.py
@ -14,7 +14,6 @@
 # limitations under the License.
 """ Finetuning the library models for task XXX."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@ -156,7 +155,7 @@ def train(args, train_dataset, model, tokenizer):
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
--- a/Show More
+++ b/Show More