update GPT2 docstring

This commit is contained in:
thomwolf 2019-08-30 12:23:12 +02:00
parent abe734ca1f
commit fd10d79b55

View File

@ -99,7 +99,10 @@ def get_pairs(word):
class GPT2Tokenizer(PreTrainedTokenizer):
"""
GPT-2 BPE tokenizer. Peculiarities:
- Byte-level BPE
- Byte-level Byte-Pair-Encoding
- Requires a space to start the input string => will add a space is there isn't.
As a consequence, this tokenizer `encode` and `decode` method will not conserve
the absence of a space at the beginning of a string: `tokenizer.decode(tokenizer.encode("Hello")) = " Hello"
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP