Remove 50k limits bug

This commit is contained in:
peterandluc 2020-04-23 17:10:57 +02:00 committed by Julien Chaumond
parent 6af5a54c28
commit 8e093e5981

View File

@ -90,7 +90,6 @@ class LineByLineTextDataset(Dataset):
with open(file_path, encoding="utf-8") as f:
lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
lines = lines[:50_000]
batch_encoding = tokenizer.batch_encode_plus(lines, add_special_tokens=True, max_length=block_size)
self.examples = batch_encoding["input_ids"]