From e708bb75bf49121c2bb9a9e178615ffc86dc279d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20=C3=85strand?= Date: Wed, 20 May 2020 16:45:59 -0400 Subject: [PATCH] Correct TF formatting to exclude LayerNorms from weight decay (#4448) * Exclude LayerNorms from weight decay * Include both formats of layer norm --- src/transformers/optimization_tf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py index 1c8a4a7df29..93c1048db07 100644 --- a/src/transformers/optimization_tf.py +++ b/src/transformers/optimization_tf.py @@ -75,7 +75,7 @@ def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=0.0, opt beta_1=0.9, beta_2=0.999, epsilon=1e-6, - exclude_from_weight_decay=["layer_norm", "bias"], + exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"], ) return optimizer