mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
[deepspeed] offload + non-cpuadam optimizer exception (#22043)
* [deepspeed] offload + non-cpuadam optimizer exception * flip * revert min version
This commit is contained in:
parent
d0c19b3303
commit
ec24132b6c
@ -426,6 +426,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
del ds_config_dict["optimizer"] # force default HF Trainer optimizer
|
||||
# force cpu offload
|
||||
ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu"
|
||||
ds_config_dict["zero_force_ds_cpu_optimizer"] = False # offload is not efficient w/o CPUAdam
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
kwargs = {"local_rank": 0, "deepspeed": ds_config_dict}
|
||||
kwargs[dtype] = True
|
||||
@ -776,6 +777,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
ds_config_dict = self.get_config_dict(stage)
|
||||
del ds_config_dict["optimizer"] # will use HF Trainer optimizer
|
||||
del ds_config_dict["scheduler"] # will use HF Trainer scheduler
|
||||
ds_config_dict["zero_force_ds_cpu_optimizer"] = False # offload is not efficient w/o CPUAdam
|
||||
# must use this setting to get the reload path exercised
|
||||
ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user