fix dataset shuffling for Distributed training (#huggingface#3721) (#3766)

This commit is contained in:
elk-cloner 2020-04-13 18:41:18 +04:30 committed by GitHub
parent 7972a4019f
commit 5ebd898953
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -317,8 +317,12 @@ def train(args, train_dataset, model: PreTrainedModel, tokenizer: PreTrainedToke
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
set_seed(args) # Added here for reproducibility
for _ in train_iterator:
for epoch in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
if args.local_rank != -1:
train_sampler.set_epoch(epoch)
for step, batch in enumerate(epoch_iterator):
# Skip past any already trained steps if resuming training