From 469384a777131e8c6d6bf7851ab0f77f2fbe3cb0 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Thu, 20 May 2021 09:55:13 -0400 Subject: [PATCH] Fix regression in regression (#11785) * Fix regression in regression * Add test --- src/transformers/models/albert/modeling_albert.py | 5 ++++- src/transformers/models/bert/modeling_bert.py | 5 ++++- src/transformers/models/big_bird/modeling_big_bird.py | 5 ++++- src/transformers/models/convbert/modeling_convbert.py | 5 ++++- .../models/distilbert/modeling_distilbert.py | 5 ++++- src/transformers/models/electra/modeling_electra.py | 5 ++++- src/transformers/models/funnel/modeling_funnel.py | 5 ++++- .../models/longformer/modeling_longformer.py | 5 ++++- .../models/mobilebert/modeling_mobilebert.py | 5 ++++- src/transformers/models/reformer/modeling_reformer.py | 5 ++++- src/transformers/models/roberta/modeling_roberta.py | 5 ++++- .../models/squeezebert/modeling_squeezebert.py | 5 ++++- src/transformers/models/xlm/modeling_xlm.py | 5 ++++- src/transformers/models/xlnet/modeling_xlnet.py | 5 ++++- tests/test_modeling_common.py | 10 +++++++++- 15 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index b33691d6462..ca41ec2a22d 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -1037,7 +1037,10 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 21a6eaab595..75aadf2d90a 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -1528,7 +1528,10 @@ class BertForSequenceClassification(BertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 45a4ad76b57..3d5e443e1cf 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -2671,7 +2671,10 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index f5b23e46005..b6ac5abc028 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -1023,7 +1023,10 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index b3cb1a93cce..3dc968cdf04 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -642,7 +642,10 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 5229054ff76..4d8479942ed 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -964,7 +964,10 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index 890a620ed41..428ce54fff4 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -1298,7 +1298,10 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index d1ab71bb7ad..4aa6f556871 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1872,7 +1872,10 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 8f50c6d6f0f..a37f3e28334 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -1279,7 +1279,10 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 4beca117a68..c19ac5265a2 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -2445,7 +2445,10 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index cf535a719c8..c3503c292a1 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1178,7 +1178,10 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index 462c8fb3762..7fb76f0328d 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -798,7 +798,10 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 8dc0d208d16..bcf08ae4109 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -847,7 +847,10 @@ class XLMForSequenceClassification(XLMPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index fa562c5f344..6f0eaa3f8ce 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -1562,7 +1562,10 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 837e267bdda..493cf7d5553 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -20,6 +20,7 @@ import os.path import random import tempfile import unittest +import warnings from typing import List, Tuple from huggingface_hub import HfApi @@ -1462,7 +1463,14 @@ class ModelTesterMixin: inputs["labels"] = inputs["labels"].to(problem_type["dtype"]) - loss = model(**inputs).loss + # This tests that we do not trigger the warning form PyTorch "Using a target size that is different + # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure + # they have the same size." which is a symptom something in wrong for the regression problem. + # See https://github.com/huggingface/transformers/issues/11780 + with warnings.catch_warnings(record=True) as warning_list: + loss = model(**inputs).loss + self.assertListEqual(warning_list, []) + loss.backward()