mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
update GPT2 docstring
This commit is contained in:
parent
abe734ca1f
commit
fd10d79b55
@ -99,7 +99,10 @@ def get_pairs(word):
|
||||
class GPT2Tokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
GPT-2 BPE tokenizer. Peculiarities:
|
||||
- Byte-level BPE
|
||||
- Byte-level Byte-Pair-Encoding
|
||||
- Requires a space to start the input string => will add a space is there isn't.
|
||||
As a consequence, this tokenizer `encode` and `decode` method will not conserve
|
||||
the absence of a space at the beginning of a string: `tokenizer.decode(tokenizer.encode("Hello")) = " Hello"
|
||||
"""
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
|
Loading…
Reference in New Issue
Block a user