mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 18:51:14 +06:00
fix #1260 - remove special logic for decoding pairs of sequence
This commit is contained in:
parent
963529e29b
commit
391db836ab
@ -933,20 +933,11 @@ class PreTrainedTokenizer(object):
|
|||||||
sub_texts.append(self.convert_tokens_to_string(current_sub_text))
|
sub_texts.append(self.convert_tokens_to_string(current_sub_text))
|
||||||
text = ''.join(sub_texts)
|
text = ''.join(sub_texts)
|
||||||
|
|
||||||
if self._sep_token is not None and self._sep_token in text:
|
if clean_up_tokenization_spaces:
|
||||||
text = text.replace(self._cls_token, self._sep_token)
|
clean_text = self.clean_up_tokenization(text)
|
||||||
split_text = list(filter(lambda sentence: len(sentence) > 0, text.split(self._sep_token)))
|
return clean_text
|
||||||
if clean_up_tokenization_spaces:
|
|
||||||
clean_text = [self.clean_up_tokenization(text) for text in split_text]
|
|
||||||
return clean_text
|
|
||||||
else:
|
|
||||||
return split_text
|
|
||||||
else:
|
else:
|
||||||
if clean_up_tokenization_spaces:
|
return text
|
||||||
clean_text = self.clean_up_tokenization(text)
|
|
||||||
return clean_text
|
|
||||||
else:
|
|
||||||
return text
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def special_tokens_map(self):
|
def special_tokens_map(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user