Fix distributed evaluation (#10795)

* Fix distributed evaluation * Use logger
2025-07-31 02:02:21 +06:00 · 2021-03-18 13:12:04 -04:00 · 2021-03-18 13:12:04 -04:00 · 008672e6e5
commit 008672e6e5
parent 9352b5151a
2 changed files with 13 additions and 3 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -690,7 +690,7 @@ class Trainer:
        """
        Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.

-        Will raise an exception if the underlying dataset dese not implement method :obj:`__len__`
+        Will raise an exception if the underlying dataset does not implement method :obj:`__len__`
        """
        return len(dataloader.dataset)

@ -1812,8 +1812,13 @@ class Trainer:

        eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
        if not prediction_loss_only:
-            preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
-            labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
+            # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
+            # a batch size to the sampler)
+            make_multiple_of = None
+            if hasattr(dataloader, "sampler") and isinstance(dataloader.sampler, SequentialDistributedSampler):
+                make_multiple_of = dataloader.sampler.batch_size
+            preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
+            labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)

        model.eval()

--- a/tests/test_trainer_distributed.py
+++ b/tests/test_trainer_distributed.py
@ -97,6 +97,11 @@ if __name__ == "__main__":
        def compute_metrics(p: EvalPrediction) -> Dict:
            sequential = list(range(len(dataset)))
            success = p.predictions.tolist() == sequential and p.label_ids.tolist() == sequential
+            if not success and training_args.local_rank == 0:
+                logger.warning(
+                    "Predictions and/or labels do not match expected results:\n  - predictions: "
+                    f"{p.predictions.tolist()}\n  - labels: {p.label_ids.tolist()}\n  - expected: {sequential}"
+                )
            return {"success": success}

        trainer = Trainer(