fix tokenizer_type to avoid error when loading checkpoint back (#20062)

This commit is contained in:
Sourab Mangrulkar 2022-11-04 19:04:01 +05:30 committed by GitHub
parent 3502c202f9
commit 19067711e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -648,7 +648,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
"data_parallel_size": args.target_data_parallel_size,
"make_vocab_size_divisible_by": args.make_vocab_size_divisible_by,
"rank": 0,
"tokenizer_type": None,
"tokenizer_type": "GPT2BPETokenizer",
}
if config.activation_function == "gelu":