mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
Use updated model_max_length
when saving tokenizers (#20401)
* Use updated values Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
ad654e4484
commit
9a5b84a007
@ -2082,6 +2082,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
)
|
||||
|
||||
tokenizer_config = copy.deepcopy(self.init_kwargs)
|
||||
|
||||
# TODO: Ensure the modified attributes (those are also in the __init__ kwargs) will give identical tokenizers
|
||||
# target_keys = self.init_kwargs.keys()
|
||||
target_keys = ["model_max_length"]
|
||||
for k in target_keys:
|
||||
if hasattr(self, k):
|
||||
tokenizer_config[k] = getattr(self, k)
|
||||
|
||||
if len(self.init_inputs) > 0:
|
||||
tokenizer_config["init_inputs"] = copy.deepcopy(self.init_inputs)
|
||||
for file_id in self.vocab_files_names.keys():
|
||||
|
Loading…
Reference in New Issue
Block a user