mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
Typo on doctring in ElectraTokenizer (#20192)
* chore: typo on docstring in tokenization_electra * chore: typo on docstring in tokenization_electra * update for check copies
This commit is contained in:
parent
4c7e8d0900
commit
2d92001076
@ -89,10 +89,10 @@ def whitespace_tokenize(text):
|
||||
return tokens
|
||||
|
||||
|
||||
# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra
|
||||
# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra,BERT->Electra
|
||||
class ElectraTokenizer(PreTrainedTokenizer):
|
||||
r"""
|
||||
Construct a BERT tokenizer. Based on WordPiece.
|
||||
Construct a Electra tokenizer. Based on WordPiece.
|
||||
|
||||
This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
|
||||
this superclass for more information regarding those methods.
|
||||
@ -129,7 +129,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
value for `lowercase` (as in the original Electra).
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
@ -226,7 +226,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
adding special tokens. A Electra sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
@ -278,8 +278,8 @@ class ElectraTokenizer(PreTrainedTokenizer):
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
|
||||
pair mask has the following format:
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Electra
|
||||
sequence pair mask has the following format:
|
||||
|
||||
```
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
|
Loading…
Reference in New Issue
Block a user