mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 18:51:14 +06:00
Improve training args (#25401)
* enhanced tips for some training args * make style
This commit is contained in:
parent
3deed1f97e
commit
00b93cda21
@ -193,9 +193,9 @@ class TrainingArguments:
|
|||||||
prediction_loss_only (`bool`, *optional*, defaults to `False`):
|
prediction_loss_only (`bool`, *optional*, defaults to `False`):
|
||||||
When performing evaluation and generating predictions, only returns the loss.
|
When performing evaluation and generating predictions, only returns the loss.
|
||||||
per_device_train_batch_size (`int`, *optional*, defaults to 8):
|
per_device_train_batch_size (`int`, *optional*, defaults to 8):
|
||||||
The batch size per GPU/TPU core/CPU for training.
|
The batch size per GPU/TPU/MPS/NPU core/CPU for training.
|
||||||
per_device_eval_batch_size (`int`, *optional*, defaults to 8):
|
per_device_eval_batch_size (`int`, *optional*, defaults to 8):
|
||||||
The batch size per GPU/TPU core/CPU for evaluation.
|
The batch size per GPU/TPU/MPS/NPU core/CPU for evaluation.
|
||||||
gradient_accumulation_steps (`int`, *optional*, defaults to 1):
|
gradient_accumulation_steps (`int`, *optional*, defaults to 1):
|
||||||
Number of updates steps to accumulate the gradients for, before performing a backward/update pass.
|
Number of updates steps to accumulate the gradients for, before performing a backward/update pass.
|
||||||
|
|
||||||
@ -648,10 +648,10 @@ class TrainingArguments:
|
|||||||
)
|
)
|
||||||
|
|
||||||
per_device_train_batch_size: int = field(
|
per_device_train_batch_size: int = field(
|
||||||
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for training."}
|
default=8, metadata={"help": "Batch size per GPU/TPU/MPS/NPU core/CPU for training."}
|
||||||
)
|
)
|
||||||
per_device_eval_batch_size: int = field(
|
per_device_eval_batch_size: int = field(
|
||||||
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for evaluation."}
|
default=8, metadata={"help": "Batch size per GPU/TPU/MPS/NPU core/CPU for evaluation."}
|
||||||
)
|
)
|
||||||
|
|
||||||
per_gpu_train_batch_size: Optional[int] = field(
|
per_gpu_train_batch_size: Optional[int] = field(
|
||||||
@ -804,7 +804,9 @@ class TrainingArguments:
|
|||||||
)
|
)
|
||||||
use_cpu: bool = field(
|
use_cpu: bool = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": " Whether or not to use cpu. If set to False, we will use cuda or mps device if available."},
|
metadata={
|
||||||
|
"help": " Whether or not to use cpu. If set to False, we will use cuda/tpu/mps/npu device if available."
|
||||||
|
},
|
||||||
)
|
)
|
||||||
use_mps_device: bool = field(
|
use_mps_device: bool = field(
|
||||||
default=False,
|
default=False,
|
||||||
|
Loading…
Reference in New Issue
Block a user