mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
Revert renaming in finetune_trainer (#9262)
This commit is contained in:
parent
ab17758874
commit
e6c1f1cad8
@ -93,19 +93,27 @@ class DataTrainingArguments:
|
|||||||
"than this will be truncated, sequences shorter will be padded."
|
"than this will be truncated, sequences shorter will be padded."
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
max_length: Optional[int] = field(
|
max_target_length: Optional[int] = field(
|
||||||
default=128,
|
default=128,
|
||||||
metadata={
|
metadata={
|
||||||
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
|
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
|
||||||
"than this will be truncated, sequences shorter will be padded."
|
"than this will be truncated, sequences shorter will be padded."
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
eval_max_length: Optional[int] = field(
|
val_max_target_length: Optional[int] = field(
|
||||||
default=142,
|
default=142,
|
||||||
metadata={
|
metadata={
|
||||||
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||||
|
"than this will be truncated, sequences shorter will be padded. "
|
||||||
|
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
|
||||||
|
"during ``evaluate`` and ``predict``."
|
||||||
|
},
|
||||||
|
)
|
||||||
|
test_max_target_length: Optional[int] = field(
|
||||||
|
default=142,
|
||||||
|
metadata={
|
||||||
|
"help": "The maximum total sequence length for test target text after tokenization. Sequences longer "
|
||||||
"than this will be truncated, sequences shorter will be padded."
|
"than this will be truncated, sequences shorter will be padded."
|
||||||
" This argument is also used to override the ``max_length`` param of ``model.generate``, which is used during ``evaluate`` and ``predict``"
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
n_train: Optional[int] = field(default=-1, metadata={"help": "# training examples. -1 means use all."})
|
n_train: Optional[int] = field(default=-1, metadata={"help": "# training examples. -1 means use all."})
|
||||||
@ -233,7 +241,7 @@ def main():
|
|||||||
type_path="train",
|
type_path="train",
|
||||||
data_dir=data_args.data_dir,
|
data_dir=data_args.data_dir,
|
||||||
n_obs=data_args.n_train,
|
n_obs=data_args.n_train,
|
||||||
max_target_length=data_args.max_length,
|
max_target_length=data_args.max_target_length,
|
||||||
max_source_length=data_args.max_source_length,
|
max_source_length=data_args.max_source_length,
|
||||||
prefix=model.config.prefix or "",
|
prefix=model.config.prefix or "",
|
||||||
)
|
)
|
||||||
@ -246,7 +254,7 @@ def main():
|
|||||||
type_path="val",
|
type_path="val",
|
||||||
data_dir=data_args.data_dir,
|
data_dir=data_args.data_dir,
|
||||||
n_obs=data_args.n_val,
|
n_obs=data_args.n_val,
|
||||||
max_target_length=data_args.eval_max_length,
|
max_target_length=data_args.val_max_target_length,
|
||||||
max_source_length=data_args.max_source_length,
|
max_source_length=data_args.max_source_length,
|
||||||
prefix=model.config.prefix or "",
|
prefix=model.config.prefix or "",
|
||||||
)
|
)
|
||||||
@ -259,7 +267,7 @@ def main():
|
|||||||
type_path="test",
|
type_path="test",
|
||||||
data_dir=data_args.data_dir,
|
data_dir=data_args.data_dir,
|
||||||
n_obs=data_args.n_test,
|
n_obs=data_args.n_test,
|
||||||
max_target_length=data_args.eval_max_length,
|
max_target_length=data_args.test_max_target_length,
|
||||||
max_source_length=data_args.max_source_length,
|
max_source_length=data_args.max_source_length,
|
||||||
prefix=model.config.prefix or "",
|
prefix=model.config.prefix or "",
|
||||||
)
|
)
|
||||||
@ -310,7 +318,7 @@ def main():
|
|||||||
logger.info("*** Evaluate ***")
|
logger.info("*** Evaluate ***")
|
||||||
|
|
||||||
metrics = trainer.evaluate(
|
metrics = trainer.evaluate(
|
||||||
metric_key_prefix="val", max_length=data_args.eval_max_length, num_beams=data_args.eval_beams
|
metric_key_prefix="val", max_length=data_args.val_max_target_length, num_beams=data_args.eval_beams
|
||||||
)
|
)
|
||||||
metrics["val_n_objs"] = data_args.n_val
|
metrics["val_n_objs"] = data_args.n_val
|
||||||
metrics["val_loss"] = round(metrics["val_loss"], 4)
|
metrics["val_loss"] = round(metrics["val_loss"], 4)
|
||||||
@ -326,7 +334,7 @@ def main():
|
|||||||
test_output = trainer.predict(
|
test_output = trainer.predict(
|
||||||
test_dataset=test_dataset,
|
test_dataset=test_dataset,
|
||||||
metric_key_prefix="test",
|
metric_key_prefix="test",
|
||||||
max_length=data_args.eval_max_length,
|
max_length=data_args.val_max_target_length,
|
||||||
num_beams=data_args.eval_beams,
|
num_beams=data_args.eval_beams,
|
||||||
)
|
)
|
||||||
metrics = test_output.metrics
|
metrics = test_output.metrics
|
||||||
|
@ -137,8 +137,8 @@ class TestFinetuneTrainer(TestCasePlus):
|
|||||||
--n_train 8
|
--n_train 8
|
||||||
--n_val 8
|
--n_val 8
|
||||||
--max_source_length {max_len}
|
--max_source_length {max_len}
|
||||||
--max_length {max_len}
|
--max_target_length {max_len}
|
||||||
--eval_max_length {max_len}
|
--val_max_target_length {max_len}
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--do_predict
|
--do_predict
|
||||||
|
@ -29,7 +29,8 @@ python finetune_trainer.py \
|
|||||||
--freeze_encoder --freeze_embeds \
|
--freeze_encoder --freeze_embeds \
|
||||||
--num_train_epochs=6 \
|
--num_train_epochs=6 \
|
||||||
--save_steps 3000 --eval_steps 3000 \
|
--save_steps 3000 --eval_steps 3000 \
|
||||||
--max_source_length $MAX_LEN --max_length $MAX_LEN --eval_max_length $MAX_LEN \
|
--max_source_length $MAX_LEN --max_target_length $MAX_LEN \
|
||||||
|
--val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
|
||||||
--do_train --do_eval --do_predict \
|
--do_train --do_eval --do_predict \
|
||||||
--evaluation_strategy steps \
|
--evaluation_strategy steps \
|
||||||
--predict_with_generate --logging_first_step \
|
--predict_with_generate --logging_first_step \
|
||||||
|
@ -30,7 +30,8 @@ python xla_spawn.py --num_cores $TPU_NUM_CORES \
|
|||||||
--num_train_epochs=6 \
|
--num_train_epochs=6 \
|
||||||
--save_steps 500 --eval_steps 500 \
|
--save_steps 500 --eval_steps 500 \
|
||||||
--logging_first_step --logging_steps 200 \
|
--logging_first_step --logging_steps 200 \
|
||||||
--max_source_length $MAX_LEN --max_length $MAX_LEN --eval_max_length $MAX_LEN \
|
--max_source_length $MAX_LEN --max_target_length $MAX_LEN \
|
||||||
|
--val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
|
||||||
--do_train --do_eval \
|
--do_train --do_eval \
|
||||||
--evaluation_strategy steps \
|
--evaluation_strategy steps \
|
||||||
--prediction_loss_only \
|
--prediction_loss_only \
|
||||||
|
@ -32,7 +32,7 @@ python finetune_trainer.py \
|
|||||||
--num_train_epochs=2 \
|
--num_train_epochs=2 \
|
||||||
--save_steps 3000 --eval_steps 3000 \
|
--save_steps 3000 --eval_steps 3000 \
|
||||||
--logging_first_step \
|
--logging_first_step \
|
||||||
--max_length 56 --eval_max_length $MAX_TGT_LEN \
|
--max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN\
|
||||||
--do_train --do_eval --do_predict \
|
--do_train --do_eval --do_predict \
|
||||||
--evaluation_strategy steps \
|
--evaluation_strategy steps \
|
||||||
--predict_with_generate --sortish_sampler \
|
--predict_with_generate --sortish_sampler \
|
||||||
|
@ -24,7 +24,7 @@ python finetune_trainer.py \
|
|||||||
--src_lang en_XX --tgt_lang ro_RO \
|
--src_lang en_XX --tgt_lang ro_RO \
|
||||||
--freeze_embeds \
|
--freeze_embeds \
|
||||||
--per_device_train_batch_size=4 --per_device_eval_batch_size=4 \
|
--per_device_train_batch_size=4 --per_device_eval_batch_size=4 \
|
||||||
--max_source_length 128 --max_length 128 --eval_max_length 128 \
|
--max_source_length 128 --max_target_length 128 --val_max_target_length 128 --test_max_target_length 128\
|
||||||
--sortish_sampler \
|
--sortish_sampler \
|
||||||
--num_train_epochs 6 \
|
--num_train_epochs 6 \
|
||||||
--save_steps 25000 --eval_steps 25000 --logging_steps 1000 \
|
--save_steps 25000 --eval_steps 25000 --logging_steps 1000 \
|
||||||
|
@ -330,7 +330,7 @@ class Seq2SeqDataCollator:
|
|||||||
[x["src_texts"] for x in batch],
|
[x["src_texts"] for x in batch],
|
||||||
tgt_texts=[x["tgt_texts"] for x in batch],
|
tgt_texts=[x["tgt_texts"] for x in batch],
|
||||||
max_length=self.data_args.max_source_length,
|
max_length=self.data_args.max_source_length,
|
||||||
max_target_length=self.data_args.max_length,
|
max_target_length=self.data_args.max_target_length,
|
||||||
padding="max_length" if self.tpu_num_cores is not None else "longest", # TPU hack
|
padding="max_length" if self.tpu_num_cores is not None else "longest", # TPU hack
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
**self.dataset_kwargs,
|
**self.dataset_kwargs,
|
||||||
|
Loading…
Reference in New Issue
Block a user