From 5b7b78e088352a3aaf1f80d26bb1cd466bc2ac64 Mon Sep 17 00:00:00 2001 From: Pascal Voitot Date: Sun, 8 Dec 2019 23:22:02 +0100 Subject: [PATCH] :bug: #2096 in tokenizer.decode, adds a space after special tokens to return right formatted string --- transformers/tokenization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/tokenization_utils.py b/transformers/tokenization_utils.py index e87c87787b9..42519c26ba5 100644 --- a/transformers/tokenization_utils.py +++ b/transformers/tokenization_utils.py @@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object): if current_sub_text: sub_texts.append(self.convert_tokens_to_string(current_sub_text)) current_sub_text = [] - sub_texts.append(" " + token) + sub_texts.append(" " + token + " ") else: current_sub_text.append(token) if current_sub_text: