single word should be set to False (#27738)

This commit is contained in:
Arthur 2023-12-04 14:56:51 +01:00 committed by GitHub
parent 2b5d5ead53
commit e739a361bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -183,7 +183,7 @@ class T5Tokenizer(PreTrainedTokenizer):
self._added_tokens_decoder = {}
for i in range(len(extra_tokens)):
self._added_tokens_decoder[len(self.sp_model) - 1 + extra_ids - i] = AddedToken(
f"<extra_id_{i}>", single_word=True, lstrip=True, rstrip=True, special=True
f"<extra_id_{i}>", single_word=False, lstrip=True, rstrip=True, special=True, normalized=False
)
if legacy is None: