Replace assert by ValueError of src/transformers/models/electra/modeling_{electra,tf_electra}.py and all other models that had copies (#13955)

* Replace all assert by ValueError in src/transformers/models/electra * Reformat with black to pass check_code_quality test * Change some assert to ValueError of modeling_bert & modeling_tf_albert * Change some assert in multiples models * Change multiples models assertion to ValueError in order to validate check_code_style test and models template test. * Black reformat * Change some more asserts in multiples models * Change assert to ValueError in modeling_layoutlm.py to fix copy error in code_style_check * Add proper message to ValueError in modeling_tf_albert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Simplify logic in models/bert/modeling_bert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Add ValueError message to models/convbert/modeling_tf_convbert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Add error message for ValueError to modeling_tf_electra.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Simplify logic in models/tapas/modeling_tapas.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Simplify logic in models/electra/modeling_electra.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Add ValueError message in src/transformers/models/bert/modeling_tf_bert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Simplify logic in src/transformers/models/rembert/modeling_rembert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Simplify logic in src/transformers/models/albert/modeling_albert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2025-07-31 02:02:21 +06:00 · 2021-10-11 19:58:09 +02:00 · 2021-10-11 19:58:09 +02:00 · 3499728dc4
commit 3499728dc4
parent 64743d0abe
13 changed files with 64 additions and 51 deletions
--- a/src/transformers/models/albert/modeling_albert.py
+++ b/src/transformers/models/albert/modeling_albert.py
@ -186,9 +186,8 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert (
-                pointer.shape == array.shape
-            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+            if pointer.shape != array.shape:
+                raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/models/albert/modeling_tf_albert.py
+++ b/src/transformers/models/albert/modeling_tf_albert.py
@ -165,7 +165,8 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
        Returns:
            final_embeddings (:obj:`tf.Tensor`): output embedding tensor.
        """
-        assert not (input_ids is None and inputs_embeds is None)
+        if input_ids is None and inputs_embeds is None:
+            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")

        if input_ids is not None:
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
--- a/src/transformers/models/bert/modeling_bert.py
+++ b/src/transformers/models/bert/modeling_bert.py
@ -153,9 +153,8 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert (
-                pointer.shape == array.shape
-            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+            if pointer.shape != array.shape:
+                raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
@ -450,7 +449,8 @@ class BertLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = BertAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)
@ -485,9 +485,10 @@ class BertLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/bert/modeling_tf_bert.py
+++ b/src/transformers/models/bert/modeling_tf_bert.py
@ -182,7 +182,8 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
        Returns:
            final_embeddings (:obj:`tf.Tensor`): output embedding tensor.
        """
-        assert not (input_ids is None and inputs_embeds is None)
+        if input_ids is None and inputs_embeds is None:
+            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")

        if input_ids is not None:
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
--- a/src/transformers/models/convbert/modeling_tf_convbert.py
+++ b/src/transformers/models/convbert/modeling_tf_convbert.py
@ -118,7 +118,8 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer):
        Returns:
            final_embeddings (:obj:`tf.Tensor`): output embedding tensor.
        """
-        assert not (input_ids is None and inputs_embeds is None)
+        if input_ids is None and inputs_embeds is None:
+            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")

        if input_ids is not None:
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
--- a/src/transformers/models/electra/modeling_electra.py
+++ b/src/transformers/models/electra/modeling_electra.py
@ -139,9 +139,8 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
            elif m_name == "kernel":
                array = np.transpose(array)
            try:
-                assert (
-                    pointer.shape == array.shape
-                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+                if pointer.shape != array.shape:
+                    raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
@ -447,7 +446,8 @@ class ElectraLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = ElectraAttention(config)
        self.intermediate = ElectraIntermediate(config)
        self.output = ElectraOutput(config)
@ -482,9 +482,10 @@ class ElectraLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/electra/modeling_tf_electra.py
+++ b/src/transformers/models/electra/modeling_tf_electra.py
@ -404,7 +404,8 @@ class TFElectraEmbeddings(tf.keras.layers.Layer):
        Returns:
            final_embeddings (:obj:`tf.Tensor`): output embedding tensor.
        """
-        assert not (input_ids is None and inputs_embeds is None)
+        if input_ids is None and inputs_embeds is None:
+            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")

        if input_ids is not None:
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
--- a/src/transformers/models/layoutlm/modeling_layoutlm.py
+++ b/src/transformers/models/layoutlm/modeling_layoutlm.py
@ -362,7 +362,8 @@ class LayoutLMLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = LayoutLMAttention(config)
        self.intermediate = LayoutLMIntermediate(config)
        self.output = LayoutLMOutput(config)
@ -397,9 +398,10 @@ class LayoutLMLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/rembert/modeling_rembert.py
+++ b/src/transformers/models/rembert/modeling_rembert.py
@ -135,9 +135,8 @@ def load_tf_weights_in_rembert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert (
-                pointer.shape == array.shape
-            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+            if pointer.shape != array.shape:
+                raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
@ -420,7 +419,8 @@ class RemBertLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = RemBertAttention(config)
        self.intermediate = RemBertIntermediate(config)
        self.output = RemBertOutput(config)
@ -455,9 +455,10 @@ class RemBertLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/roberta/modeling_roberta.py
+++ b/src/transformers/models/roberta/modeling_roberta.py
@ -389,7 +389,8 @@ class RobertaLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = RobertaAttention(config)
        self.intermediate = RobertaIntermediate(config)
        self.output = RobertaOutput(config)
@ -424,9 +425,10 @@ class RobertaLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/roformer/modeling_roformer.py
+++ b/src/transformers/models/roformer/modeling_roformer.py
@ -167,9 +167,8 @@ def load_tf_weights_in_roformer(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert (
-                pointer.shape == array.shape
-            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+            if not pointer.shape == array.shape:
+                raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
@ -463,7 +462,8 @@ class RoFormerLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = RoFormerAttention(config)
        self.intermediate = RoFormerIntermediate(config)
        self.output = RoFormerOutput(config)
--- a/src/transformers/models/splinter/modeling_splinter.py
+++ b/src/transformers/models/splinter/modeling_splinter.py
@ -329,7 +329,8 @@ class SplinterLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = SplinterAttention(config)
        self.intermediate = SplinterIntermediate(config)
        self.output = SplinterOutput(config)
@ -364,9 +365,10 @@ class SplinterLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
--- a/src/transformers/models/tapas/modeling_tapas.py
+++ b/src/transformers/models/tapas/modeling_tapas.py
@ -252,9 +252,8 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert (
-                pointer.shape == array.shape
-            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
+            if pointer.shape != array.shape:
+                raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
@ -548,7 +547,8 @@ class TapasLayer(nn.Module):
        self.is_decoder = config.is_decoder
        self.add_cross_attention = config.add_cross_attention
        if self.add_cross_attention:
-            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            if not self.is_decoder:
+                raise ValueError(f"{self} should be used as a decoder model if cross attention is added")
            self.crossattention = TapasAttention(config)
        self.intermediate = TapasIntermediate(config)
        self.output = TapasOutput(config)
@ -583,9 +583,10 @@ class TapasLayer(nn.Module):

        cross_attn_present_key_value = None
        if self.is_decoder and encoder_hidden_states is not None:
-            assert hasattr(
-                self, "crossattention"
-            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            if not hasattr(self, "crossattention"):
+                raise ValueError(
+                    f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+                )

            # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None