mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 03:01:07 +06:00
[T5 and Llama Tokenizer
] remove warning (#29346)
* remove warning * add co-author * update --------- Co-authored-by: hiaoxui <hiaoxui@users.noreply.github.com>
This commit is contained in:
parent
a52888524d
commit
7c87f3577e
@ -243,7 +243,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
|
||||
return vocab
|
||||
|
||||
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
|
||||
def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]:
|
||||
def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
|
||||
"""
|
||||
Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
|
||||
first token is special.
|
||||
@ -255,7 +255,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
|
||||
if self.add_prefix_space:
|
||||
text = SPIECE_UNDERLINE + text
|
||||
|
||||
tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs)
|
||||
tokens = super().tokenize(text, **kwargs)
|
||||
|
||||
if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
|
||||
tokens = tokens[1:]
|
||||
|
@ -447,7 +447,7 @@ class SeamlessM4TTokenizer(PreTrainedTokenizer):
|
||||
return tokenizer
|
||||
|
||||
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
|
||||
def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]:
|
||||
def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
|
||||
"""
|
||||
Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
|
||||
first token is special.
|
||||
@ -459,7 +459,7 @@ class SeamlessM4TTokenizer(PreTrainedTokenizer):
|
||||
if self.add_prefix_space:
|
||||
text = SPIECE_UNDERLINE + text
|
||||
|
||||
tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs)
|
||||
tokens = super().tokenize(text, **kwargs)
|
||||
|
||||
if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
|
||||
tokens = tokens[1:]
|
||||
|
@ -377,7 +377,7 @@ class T5Tokenizer(PreTrainedTokenizer):
|
||||
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||
self.sp_model.Load(self.vocab_file)
|
||||
|
||||
def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]:
|
||||
def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
|
||||
"""
|
||||
Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
|
||||
first token is special.
|
||||
@ -389,7 +389,7 @@ class T5Tokenizer(PreTrainedTokenizer):
|
||||
if self.add_prefix_space:
|
||||
text = SPIECE_UNDERLINE + text
|
||||
|
||||
tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs)
|
||||
tokens = super().tokenize(text, **kwargs)
|
||||
|
||||
if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
|
||||
tokens = tokens[1:]
|
||||
|
Loading…
Reference in New Issue
Block a user