🐛 #2096 in tokenizer.decode, adds a space after special tokens to return right formatted string

This commit is contained in:
Pascal Voitot 2019-12-08 23:22:02 +01:00 committed by Lysandre Debut
parent 866d73ca26
commit 5b7b78e088

View File

@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object):
if current_sub_text:
sub_texts.append(self.convert_tokens_to_string(current_sub_text))
current_sub_text = []
sub_texts.append(" " + token)
sub_texts.append(" " + token + " ")
else:
current_sub_text.append(token)
if current_sub_text: