🐛 #2096 in tokenizer.decode, adds a space after special tokens to return right formatted string

2025-07-31 02:02:21 +06:00 · 2019-12-08 23:22:02 +01:00 · 2019-12-08 23:22:02 +01:00 · 5b7b78e088
commit 5b7b78e088
parent 866d73ca26
1 changed files with 1 additions and 1 deletions
--- a/transformers/tokenization_utils.py
+++ b/transformers/tokenization_utils.py
@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object):
                if current_sub_text:
                    sub_texts.append(self.convert_tokens_to_string(current_sub_text))
                    current_sub_text = []
-                sub_texts.append(" " + token)
+                sub_texts.append(" " + token + " ")
            else:
                current_sub_text.append(token)
        if current_sub_text: