mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Fix tokenizers training in notebook (#10110)
This commit is contained in:
parent
85395e4901
commit
1fbaa3c117
@ -229,7 +229,7 @@
|
||||
"\n",
|
||||
"# We initialize our trainer, giving him the details about the vocabulary we want to generate\n",
|
||||
"trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n",
|
||||
"tokenizer.train(trainer, [\"big.txt\"])\n",
|
||||
"tokenizer.train(files=[\"big.txt\"], trainer=trainer)\n",
|
||||
"\n",
|
||||
"print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))"
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user