TFTrainer dataset doc & fix evaluation bug (#6618)

* TFTrainer dataset doc & fix evaluation bug discussed in #6551 * add docstring to test/eval datasets
2025-07-31 02:02:21 +06:00 · 2020-08-20 12:11:36 -04:00 · 2020-08-20 12:11:36 -04:00 · f9d280a959
commit f9d280a959
parent 573bdb0a5d
1 changed files with 32 additions and 7 deletions
--- a/src/transformers/trainer_tf.py
+++ b/src/transformers/trainer_tf.py
@ -38,9 +38,17 @@ class TFTrainer:
        args (:class:`~transformers.TFTrainingArguments`):
            The arguments to tweak training.
        train_dataset (:class:`~tf.data.Dataset`, `optional`):
-            The dataset to use for training.
+            The dataset to use for training. The dataset should yield tuples of ``(features, labels)`` where
+            ``features`` is a dict of input features and ``labels`` is the labels. If ``labels`` is a tensor, the loss is
+            calculated by the model by calling ``model(features, labels=labels)``. If ``labels`` is a dict, such as when
+            using a QuestionAnswering head model with multiple targets, the loss is instead calculated by calling
+            ``model(features, **labels)``.
        eval_dataset (:class:`~tf.data.Dataset`, `optional`):
-            The dataset to use for evaluation.
+            The dataset to use for evaluation. The dataset should yield tuples of ``(features, labels)`` where
+            ``features`` is a dict of input features and ``labels`` is the labels. If ``labels`` is a tensor, the loss is
+            calculated by the model by calling ``model(features, labels=labels)``. If ``labels`` is a dict, such as when
+            using a QuestionAnswering head model with multiple targets, the loss is instead calculated by calling
+            ``model(features, **labels)``.
        compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
            The function that will be used to compute metrics at evaluation. Must take a
            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
@ -145,7 +153,11 @@ class TFTrainer:

        Args:
            eval_dataset (:class:`~tf.data.Dataset`, `optional`):
-                If provided, will override `self.eval_dataset`.
+                If provided, will override `self.eval_dataset`. The dataset should yield tuples of ``(features,
+                labels)`` where ``features`` is a dict of input features and ``labels`` is the labels. If ``labels``
+                is a tensor, the loss is calculated by the model by calling ``model(features, labels=labels)``. If
+                ``labels`` is a dict, such as when using a QuestionAnswering head model with multiple targets, the
+                loss is instead calculated by calling ``model(features, **labels)``.

        Subclass and override this method if you want to inject some custom behavior.
        """
@ -173,7 +185,12 @@ class TFTrainer:
        Returns a test :class:`~tf.data.Dataset`.

        Args:
-            test_dataset (:class:`~tf.data.Dataset`): The dataset to use.
+            test_dataset (:class:`~tf.data.Dataset`):
+                The dataset to use. The dataset should yield tuples of ``(features, labels)`` where ``features`` is
+                a dict of input features and ``labels`` is the labels. If ``labels`` is a tensor, the loss is
+                calculated by the model by calling ``model(features, labels=labels)``. If ``labels`` is a dict, such
+                as when using a QuestionAnswering head model with multiple targets, the loss is instead calculated
+                by calling ``model(features, **labels)``.

        Subclass and override this method if you want to inject some custom behavior.
        """
@ -405,14 +422,18 @@ class TFTrainer:

        Args:
            eval_dataset (:class:`~tf.data.Dataset`, `optional`):
-                Pass a dataset if you wish to override :obj:`self.eval_dataset`.
+                Pass a dataset if you wish to override :obj:`self.eval_dataset`. The dataset should yield tuples of
+                ``(features, labels)`` where ``features`` is a dict of input features and ``labels`` is the labels.
+                If ``labels`` is a tensor, the loss is calculated by the model by calling ``model(features,
+                labels=labels)``. If ``labels`` is a dict, such as when using a QuestionAnswering head model with
+                multiple targets, the loss is instead calculated by calling ``model(features, **labels)``.

        Returns:
            A dictionary containing the evaluation loss and the potential metrics computed from the predictions.
        """
        eval_ds, steps, num_examples = self.get_eval_tfdataset(eval_dataset)

-        output = self._prediction_loop(eval_ds, steps, num_examples, description="Evaluation")
+        output = self.prediction_loop(eval_ds, steps, num_examples, description="Evaluation")
        logs = {**output.metrics}
        logs["epoch"] = self.epoch_logging

@ -666,7 +687,11 @@ class TFTrainer:

        Args:
            test_dataset (:class:`~tf.data.Dataset`):
-                Dataset to run the predictions on.
+                Dataset to run the predictions on. The dataset should yield tuples of ``(features, labels)`` where
+                ``features`` is a dict of input features and ``labels`` is the labels. If ``labels`` is a tensor,
+                the loss is calculated by the model by calling ``model(features, labels=labels)``. If ``labels`` is
+                a dict, such as when using a QuestionAnswering head model with multiple targets, the loss is instead
+                calculated by calling ``model(features, **labels)``.
        Returns:
            `NamedTuple`:
            predictions (:obj:`np.ndarray`):