Slightly alter Keras dummy loss (#20232)

* Slightly alter Keras dummy loss * Slightly alter Keras dummy loss * Add sample weight to test_keras_fit * Fix test_keras_fit for datasets * Skip the sample_weight stuff for models where the model tester has no batch_size
2025-07-17 03:28:22 +06:00 · 2022-11-15 16:58:43 +00:00 · 2022-11-15 16:58:43 +00:00 · 26ec7928d0
commit 26ec7928d0
parent 7f74433814
2 changed files with 21 additions and 2 deletions
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@ -94,7 +94,11 @@ TFModelInputType = Union[
 def dummy_loss(y_true, y_pred):
-    return tf.reduce_mean(y_pred)
+    if y_pred.shape.rank <= 1:
        return y_pred
    else:
        reduction_axes = list(range(1, y_pred.shape.rank))
        return tf.reduce_mean(y_pred, axis=reduction_axes)
 class TFModelUtilsMixin:
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@ -1544,6 +1544,11 @@ class TFModelTesterMixin:
            else:
                metrics = []
            if hasattr(self.model_tester, "batch_size"):
                sample_weight = tf.convert_to_tensor([0.5] * self.model_tester.batch_size, dtype=tf.float32)
            else:
                sample_weight = None
            model(model.dummy_inputs)  # Build the model so we can get some constant weights
            model_weights = model.get_weights()
@ -1553,6 +1558,7 @@ class TFModelTesterMixin:
            history1 = model.fit(
                prepared_for_class,
                validation_data=prepared_for_class,
                sample_weight=sample_weight,
                steps_per_epoch=1,
                validation_steps=1,
                shuffle=False,
@ -1588,6 +1594,7 @@ class TFModelTesterMixin:
                inputs_minus_labels,
                labels,
                validation_data=(inputs_minus_labels, labels),
                sample_weight=sample_weight,
                steps_per_epoch=1,
                validation_steps=1,
                shuffle=False,
@ -1605,14 +1612,22 @@ class TFModelTesterMixin:
            # Make sure fit works with tf.data.Dataset and results are consistent
            dataset = tf.data.Dataset.from_tensor_slices(prepared_for_class)
            if sample_weight is not None:
                # Add in the sample weight
                weighted_dataset = dataset.map(lambda x: (x, None, tf.convert_to_tensor(0.5, dtype=tf.float32)))
            else:
                weighted_dataset = dataset
            # Pass in all samples as a batch to match other `fit` calls
            weighted_dataset = weighted_dataset.batch(len(dataset))
            dataset = dataset.batch(len(dataset))
            # Reinitialize to fix batchnorm again
            model.set_weights(model_weights)
            # To match the other calls, don't pass sample weights in the validation data
            history3 = model.fit(
-                dataset,
+                weighted_dataset,
                validation_data=dataset,
                steps_per_epoch=1,
                validation_steps=1,