fix #1260 - remove special logic for decoding pairs of sequence

2025-08-01 18:51:14 +06:00 · 2019-10-01 19:09:13 -04:00 · 2019-10-01 19:09:13 -04:00 · 391db836ab
commit 391db836ab
parent 963529e29b
1 changed files with 4 additions and 13 deletions
--- a/transformers/tokenization_utils.py
+++ b/transformers/tokenization_utils.py
@ -933,20 +933,11 @@ class PreTrainedTokenizer(object):
            sub_texts.append(self.convert_tokens_to_string(current_sub_text))
        text = ''.join(sub_texts)
-        if self._sep_token is not None and self._sep_token in text:
+        if clean_up_tokenization_spaces:
-            text = text.replace(self._cls_token, self._sep_token)
+            clean_text = self.clean_up_tokenization(text)
-            split_text = list(filter(lambda sentence: len(sentence) > 0, text.split(self._sep_token)))
+            return clean_text
            if clean_up_tokenization_spaces:
                clean_text = [self.clean_up_tokenization(text) for text in split_text]
                return clean_text
            else:
                return split_text
        else:
-            if clean_up_tokenization_spaces:
+            return text
                clean_text = self.clean_up_tokenization(text)
                return clean_text
            else:
                return text
    @property
    def special_tokens_map(self):