loads the tokenizer for each checkpoint, to solve the reproducability issue

This commit is contained in:
Rabeeh KARIMI 2019-08-14 10:58:26 +02:00
parent f63ff536ad
commit 3d47a7f8ab

View File

@ -463,6 +463,7 @@ def main():
for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
model = model_class.from_pretrained(checkpoint)
tokenizer = tokenizer_class.from_pretrained(checkpoint)
model.to(args.device)
result = evaluate(args, model, tokenizer, prefix=global_step)
result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())