fixing tokenization of extra_id symbols in T5Tokenizer. Related to issue 4021 (#4353)

This commit is contained in:
Elman Mansimov 2020-05-25 16:04:30 -04:00 committed by GitHub
parent 5139733623
commit 3dea40b858
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -503,6 +503,7 @@ class SpecialTokensMixin:
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
if key == "additional_special_tokens":
assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
setattr(self, key, value)
elif isinstance(value, AddedTokenFast):
setattr(self, key, str(value))
elif isinstance(value, str):