fix #1260 - remove special logic for decoding pairs of sequence

2025-07-31 02:02:21 +06:00 · 2019-10-01 19:09:13 -04:00 · 2019-10-01 19:09:13 -04:00 · 391db836ab
commit 391db836ab
parent 963529e29b
1 changed files with 4 additions and 13 deletions
--- a/transformers/tokenization_utils.py
+++ b/transformers/tokenization_utils.py
@ -933,20 +933,11 @@ class PreTrainedTokenizer(object):
            sub_texts.append(self.convert_tokens_to_string(current_sub_text))
        text = ''.join(sub_texts)

-        if self._sep_token is not None and self._sep_token in text:
-            text = text.replace(self._cls_token, self._sep_token)
-            split_text = list(filter(lambda sentence: len(sentence) > 0, text.split(self._sep_token)))
-            if clean_up_tokenization_spaces:
-                clean_text = [self.clean_up_tokenization(text) for text in split_text]
-                return clean_text
-            else:
-                return split_text
+        if clean_up_tokenization_spaces:
+            clean_text = self.clean_up_tokenization(text)
+            return clean_text
        else:
-            if clean_up_tokenization_spaces:
-                clean_text = self.clean_up_tokenization(text)
-                return clean_text
-            else:
-                return text
+            return text

    @property
    def special_tokens_map(self):