Fix tokenizers training in notebook (#10110)

This commit is contained in:
Anthony MOI 2021-02-09 21:48:22 -05:00 committed by GitHub
parent 85395e4901
commit 1fbaa3c117
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -229,7 +229,7 @@
"\n",
"# We initialize our trainer, giving him the details about the vocabulary we want to generate\n",
"trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n",
"tokenizer.train(trainer, [\"big.txt\"])\n",
"tokenizer.train(files=[\"big.txt\"], trainer=trainer)\n",
"\n",
"print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))"
]