mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Restrain tokenizer.model_max_length default (#9681)
* Restrain tokenizer.model_max_length default * Fix indent
This commit is contained in:
parent
7e662e6a3b
commit
a1ad16a446
@ -338,6 +338,12 @@ def main():
|
||||
|
||||
if data_args.max_seq_length is None:
|
||||
max_seq_length = tokenizer.model_max_length
|
||||
if max_seq_length > 1024:
|
||||
logger.warn(
|
||||
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
|
||||
"Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
|
||||
)
|
||||
max_seq_length = 1024
|
||||
else:
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warn(
|
||||
|
Loading…
Reference in New Issue
Block a user