Fix layerwise GaLore optimizer hard to converge with warmup scheduler (#30372)

Update optimization.py
This commit is contained in:
hoshi-hiyouga 2024-04-23 00:00:26 +08:00 committed by GitHub
parent 0d84901cb7
commit f3b3533e19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -444,9 +444,8 @@ def get_scheduler(
def scheduler_hook(param):
# Since the optimizer hook has been already attached we only need to
# attach the scheduler hook
if param.grad is not None:
scheduler_dict[param].step()
# attach the scheduler hook, the gradients have been zeroed here
scheduler_dict[param].step()
for param in optimizer_dict.keys():
if param.requires_grad: