From d94c6b01445531a649f61b2faebfa767d8b1915f Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 23 Apr 2019 11:17:06 +0200 Subject: [PATCH] fix training schedules in examples to match new API --- examples/lm_finetuning/finetune_on_pregenerated.py | 9 +++++---- examples/lm_finetuning/simple_lm_finetuning.py | 7 +++++-- examples/run_classifier.py | 7 +++++-- examples/run_squad.py | 7 +++++-- examples/run_swag.py | 7 +++++-- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/examples/lm_finetuning/finetune_on_pregenerated.py b/examples/lm_finetuning/finetune_on_pregenerated.py index 5c3051f5009..1638b02a6fa 100644 --- a/examples/lm_finetuning/finetune_on_pregenerated.py +++ b/examples/lm_finetuning/finetune_on_pregenerated.py @@ -14,7 +14,7 @@ from tqdm import tqdm from pytorch_pretrained_bert.modeling import BertForPreTraining from pytorch_pretrained_bert.tokenization import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear +from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next") @@ -268,7 +268,8 @@ def main(): optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) - + warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, @@ -314,8 +315,8 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, - args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() diff --git a/examples/lm_finetuning/simple_lm_finetuning.py b/examples/lm_finetuning/simple_lm_finetuning.py index 0f854733308..6511ead5902 100644 --- a/examples/lm_finetuning/simple_lm_finetuning.py +++ b/examples/lm_finetuning/simple_lm_finetuning.py @@ -31,7 +31,7 @@ from tqdm import tqdm, trange from pytorch_pretrained_bert.modeling import BertForPreTraining from pytorch_pretrained_bert.tokenization import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear +from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', @@ -556,6 +556,8 @@ def main(): optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) + warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, @@ -601,7 +603,8 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() diff --git a/examples/run_classifier.py b/examples/run_classifier.py index b90ac494e4f..bdcad6f0eb3 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -38,7 +38,7 @@ from sklearn.metrics import matthews_corrcoef, f1_score from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert.modeling import BertForSequenceClassification, BertConfig from pytorch_pretrained_bert.tokenization import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear +from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule logger = logging.getLogger(__name__) @@ -784,6 +784,8 @@ def main(): optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) + warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, @@ -852,7 +854,8 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() diff --git a/examples/run_squad.py b/examples/run_squad.py index 410fd852988..c3fdf03774f 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -36,7 +36,7 @@ from tqdm import tqdm, trange from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert.modeling import BertForQuestionAnswering, BertConfig -from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear +from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize) @@ -949,6 +949,8 @@ def main(): optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) + warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, @@ -1013,7 +1015,8 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used and handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() diff --git a/examples/run_swag.py b/examples/run_swag.py index a6cfdbe311d..bd724c48adc 100644 --- a/examples/run_swag.py +++ b/examples/run_swag.py @@ -34,7 +34,7 @@ from tqdm import tqdm, trange from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert.modeling import BertForMultipleChoice, BertConfig -from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear +from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.tokenization import BertTokenizer logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', @@ -411,6 +411,8 @@ def main(): optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) + warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, @@ -464,7 +466,8 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step()