Merge pull request #389 from lukovnikov/master

Fix cosine schedule
2025-07-31 02:02:21 +06:00 · 2019-04-03 11:21:43 +02:00 · 2019-04-03 11:21:43 +02:00 · db4dccd1b5
commit db4dccd1b5
parent 19666dcb3b 19cc2c084e
2 changed files with 4 additions and 2 deletions
--- a/pytorch_pretrained_bert/optimization.py
+++ b/pytorch_pretrained_bert/optimization.py
@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
 def warmup_cosine(x, warmup=0.002):
    if x < warmup:
        return x/warmup
-    return 0.5 * (1.0 + torch.cos(math.pi * x))
+    x_ = (x - warmup) / (1 - warmup)  # progress after warmup -
+    return 0.5 * (1. + math.cos(math.pi * x_))

 def warmup_constant(x, warmup=0.002):
    """ Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps.
--- a/pytorch_pretrained_bert/optimization_openai.py
+++ b/pytorch_pretrained_bert/optimization_openai.py
@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
 def warmup_cosine(x, warmup=0.002):
    if x < warmup:
        return x/warmup
-    return 0.5 * (1.0 + torch.cos(math.pi * x))
+    x_ = (x - warmup) / (1 - warmup)  # progress after warmup
+    return 0.5 * (1. + math.cos(math.pi * x_))

 def warmup_constant(x, warmup=0.002):
    """ Linearly increases learning rate over `warmup`*`t_total` (as provided to OpenAIAdam) training steps.