Correct special_tokens_mask when add_special_tokens=False (#2965)

Don't know of a use case where that would be useful, but this is more consistent
This commit is contained in:
Lysandre Debut 2020-02-23 09:50:39 -05:00 committed by GitHub
parent 92487a1dc0
commit 0e84559d64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1233,7 +1233,10 @@ class PreTrainedTokenizer(object):
token_type_ids = [0] * len(ids) + ([1] * len(pair_ids) if pair else [])
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = self.get_special_tokens_mask(ids, pair_ids)
if add_special_tokens:
encoded_inputs["special_tokens_mask"] = self.get_special_tokens_mask(ids, pair_ids)
else:
encoded_inputs["special_tokens_mask"] = [0] * len(sequence)
encoded_inputs["input_ids"] = sequence
if return_token_type_ids: