Typo on doctring in ElectraTokenizer (#20192)

* chore: typo on docstring in tokenization_electra * chore: typo on docstring in tokenization_electra * update for check copies
2025-08-02 19:21:31 +06:00 · 2022-11-15 23:10:20 +09:00 · 2022-11-15 23:10:20 +09:00 · 2d92001076
commit 2d92001076
parent 4c7e8d0900
1 changed files with 6 additions and 6 deletions
--- a/src/transformers/models/electra/tokenization_electra.py
+++ b/src/transformers/models/electra/tokenization_electra.py
@ -89,10 +89,10 @@ def whitespace_tokenize(text):
    return tokens


-# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra
+# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra,BERT->Electra
 class ElectraTokenizer(PreTrainedTokenizer):
    r"""
-    Construct a BERT tokenizer. Based on WordPiece.
+    Construct a Electra tokenizer. Based on WordPiece.

    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.
@ -129,7 +129,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
            [issue](https://github.com/huggingface/transformers/issues/328)).
        strip_accents (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
-            value for `lowercase` (as in the original BERT).
+            value for `lowercase` (as in the original Electra).
    """

    vocab_files_names = VOCAB_FILES_NAMES
@ -226,7 +226,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
-        adding special tokens. A BERT sequence has the following format:
+        adding special tokens. A Electra sequence has the following format:

        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`
@ -278,8 +278,8 @@ class ElectraTokenizer(PreTrainedTokenizer):
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
-        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
-        pair mask has the following format:
+        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Electra
+        sequence pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1