From 28ba345eccb8a7af3e044f3dd82c1d661a065d80 Mon Sep 17 00:00:00 2001 From: Ethan Perez Date: Sun, 4 Aug 2019 12:31:46 -0400 Subject: [PATCH] Fixing unused weight_decay argument Currently the L2 regularization is hard-coded to "0.01", even though there is a --weight_decay flag implemented (that is unused). I'm making this flag control the weight decay used for fine-tuning in this script. --- examples/single_model_scripts/run_openai_gpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/single_model_scripts/run_openai_gpt.py b/examples/single_model_scripts/run_openai_gpt.py index af737b953ef..479c08782d8 100644 --- a/examples/single_model_scripts/run_openai_gpt.py +++ b/examples/single_model_scripts/run_openai_gpt.py @@ -205,7 +205,7 @@ def main(): param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ - {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)