From 5b7b78e088352a3aaf1f80d26bb1cd466bc2ac64 Mon Sep 17 00:00:00 2001
From: Pascal Voitot
Date: Sun, 8 Dec 2019 23:22:02 +0100
Subject: [PATCH] :bug: #2096 in tokenizer.decode, adds a space after special
tokens to return right formatted string
---
transformers/tokenization_utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/transformers/tokenization_utils.py b/transformers/tokenization_utils.py
index e87c87787b9..42519c26ba5 100644
--- a/transformers/tokenization_utils.py
+++ b/transformers/tokenization_utils.py
@@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object):
if current_sub_text:
sub_texts.append(self.convert_tokens_to_string(current_sub_text))
current_sub_text = []
- sub_texts.append(" " + token)
+ sub_texts.append(" " + token + " ")
else:
current_sub_text.append(token)
if current_sub_text: