From ca7ff64f5be1b45356e9c56f8b524826e8c7cd2e Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Wed, 21 Apr 2021 07:48:15 -0700 Subject: [PATCH] [deepspeed] fix resume from checkpoint (#11352) This PR fixes a bug that most likely somehow got exposed (not caused) by https://github.com/huggingface/transformers/pull/11318 - surprisingly the same test worked just fine before that other PR. --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 254f7d8e6e3..9635dc40a3f 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1017,7 +1017,7 @@ class Trainer: "yield to errors or unwanted behaviors." ) - if self.deepspeed: + if args.deepspeed: # will be resumed in deepspeed_init pass else: