mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix layerwise GaLore optimizer hard to converge with warmup scheduler (#30372)
Update optimization.py
This commit is contained in:
parent
0d84901cb7
commit
f3b3533e19
@ -444,9 +444,8 @@ def get_scheduler(
|
||||
|
||||
def scheduler_hook(param):
|
||||
# Since the optimizer hook has been already attached we only need to
|
||||
# attach the scheduler hook
|
||||
if param.grad is not None:
|
||||
scheduler_dict[param].step()
|
||||
# attach the scheduler hook, the gradients have been zeroed here
|
||||
scheduler_dict[param].step()
|
||||
|
||||
for param in optimizer_dict.keys():
|
||||
if param.requires_grad:
|
||||
|
Loading…
Reference in New Issue
Block a user