diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 590c5da1953..357cfc45bdd 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1704,6 +1704,7 @@ class Trainer: self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, ignore_keys_for_eval=None ): self._train_batch_size = batch_size + logger.debug(f"Currently training with a batch size of: {self._train_batch_size}") # Data loader and number of training steps train_dataloader = self.get_train_dataloader() @@ -1811,7 +1812,7 @@ class Trainer: logger.info("***** Running training *****") logger.info(f" Num examples = {num_examples:,}") logger.info(f" Num Epochs = {num_train_epochs:,}") - logger.info(f" Instantaneous batch size per device = {args.per_device_train_batch_size:,}") + logger.info(f" Instantaneous batch size per device = {self._train_batch_size:,}") logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size:,}") logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") logger.info(f" Total optimization steps = {max_steps:,}")