Accumulate opt state dict on do_rank 0 (#11481)

2025-08-01 18:51:14 +06:00 · 2021-05-03 13:18:27 -04:00 · 2021-05-03 13:18:27 -04:00 · f4c9a7e62e
commit f4c9a7e62e
parent 1e8e06862f
1 changed files with 9 additions and 8 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -1420,6 +1420,7 @@ class Trainer:
                xm.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                reissue_pt_warnings(caught_warnings)
        elif is_sagemaker_mp_enabled():
            if smp.dp_rank() == 0:
                # Consolidate the state dict on all processed of dp_rank 0
                opt_state_dict = self.optimizer.state_dict()
                # Save it and the scheduler on the main process