[clean_up_tokenization_spaces] Pl bart was failing, updating (#33735)

`clean_up_tokenization_spaces=True` for pl bart
This commit is contained in:
Arthur 2024-09-27 10:26:51 +02:00 committed by GitHub
parent 294477aafb
commit 5f4420587a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -130,6 +130,7 @@ class PLBartTokenizer(PreTrainedTokenizer):
tgt_lang=None,
sp_model_kwargs: Optional[Dict[str, Any]] = None,
additional_special_tokens=None,
clean_up_tokenization_spaces=True,
**kwargs,
):
# Mask token behave like a normal word, i.e. include the space before it
@ -200,6 +201,7 @@ class PLBartTokenizer(PreTrainedTokenizer):
tgt_lang=tgt_lang,
additional_special_tokens=_additional_special_tokens,
sp_model_kwargs=self.sp_model_kwargs,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
**kwargs,
)