mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix mask token handling (#14364)
* Fix mask token handling
* Revert "Fix mask token handling"
This reverts commit daaa3f5291
.
* Fix FNet mask token tokenization
This commit is contained in:
parent
4df7d05a87
commit
934e2799da
@ -113,8 +113,13 @@ class FNetTokenizer(PreTrainedTokenizer):
|
||||
sp_model_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
# Mask token behave like a normal word, i.e. include the space before it
|
||||
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
||||
# Mask token behave like a normal word, i.e. include the space before it and
|
||||
# is included in the raw text, there should be a match in a non-normalized sentence.
|
||||
mask_token = (
|
||||
AddedToken(mask_token, lstrip=True, rstrip=False, normalized=False)
|
||||
if isinstance(mask_token, str)
|
||||
else mask_token
|
||||
)
|
||||
|
||||
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
||||
|
||||
|
@ -107,8 +107,13 @@ class FNetTokenizerFast(PreTrainedTokenizerFast):
|
||||
mask_token="[MASK]",
|
||||
**kwargs
|
||||
):
|
||||
# Mask token behave like a normal word, i.e. include the space before it
|
||||
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
||||
# Mask token behave like a normal word, i.e. include the space before it and
|
||||
# is included in the raw text, there should be a match in a non-normalized sentence.
|
||||
mask_token = (
|
||||
AddedToken(mask_token, lstrip=True, rstrip=False, normalized=False)
|
||||
if isinstance(mask_token, str)
|
||||
else mask_token
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
vocab_file,
|
||||
|
Loading…
Reference in New Issue
Block a user