mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Remove 50k limits bug
This commit is contained in:
parent
6af5a54c28
commit
8e093e5981
@ -90,7 +90,6 @@ class LineByLineTextDataset(Dataset):
|
||||
with open(file_path, encoding="utf-8") as f:
|
||||
lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
|
||||
|
||||
lines = lines[:50_000]
|
||||
batch_encoding = tokenizer.batch_encode_plus(lines, add_special_tokens=True, max_length=block_size)
|
||||
self.examples = batch_encoding["input_ids"]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user