mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
fix bug using FSDP V1 will lead to model device not properly set (#39177)
* fix bug using FSDP V1 will lead to model device not properly set Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> * update the code Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> --------- Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com>
This commit is contained in:
parent
34c16167eb
commit
4243bb844d
@ -2294,9 +2294,7 @@ class Trainer:
|
||||
else:
|
||||
debug_overflow = DebugUnderflowOverflow(self.model) # noqa
|
||||
|
||||
delay_optimizer_creation = (
|
||||
is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled or self.is_tp_enabled
|
||||
)
|
||||
delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled
|
||||
|
||||
# Can't delay optimizer creation when using FSDP2: https://github.com/huggingface/accelerate/blob/3f636d626063ffcf9a337c7d3624d61b7d187d59/src/accelerate/accelerator.py#L1404
|
||||
is_fsdp2 = self.is_fsdp_enabled and (getattr(self.accelerator.state.fsdp_plugin, "fsdp_version", 1) == 2)
|
||||
@ -2356,8 +2354,9 @@ class Trainer:
|
||||
if self.use_apex:
|
||||
model = self.accelerator.prepare(self.model)
|
||||
else:
|
||||
if delay_optimizer_creation:
|
||||
model = self.accelerator.prepare(self.model)
|
||||
# We should avoid accelerate preparing the model in TP case since we dont need it as it is handled by transformers from_pretrained and also it goes into DDP based preparation.
|
||||
if self.is_tp_enabled:
|
||||
self.optimizer = self.accelerator.prepare(self.optimizer)
|
||||
else:
|
||||
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user