Replace assertions with value errors on distilbert model (#20463)

* Changed assert into 7-8 exceptions * updated syntax error * updated error * updated file (Co-autho: Batese2001) * Successful test on test_modeling_distilbert.py Successful raising errors and exceptions on the revised code in test_modeling_distilbert.py . Co-credit: @batese2001 * Delete test_modeling_distilbert.ipynb * Update modeling_distilbert.py * Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001) * Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001) * committing the reformatted distilbert model * reformatted distilbert model * reformatted distilbert model * reformatted distilbert model * reformatted distilbert model with black * Changed comments that explain better about raising exceptions for not having the even number of multi heads * Changed comments that explain better about raising exceptions for not having the even number of multi heads * changed based on the feedback * Changed line 833 based on the suggestion made from @younesbelkada * Changed line 833 based on the suggestion made from @younesbelkada draft2 * reformatted file * Update src/transformers/models/distilbert/modeling_distilbert.py * Update src/transformers/models/distilbert/modeling_distilbert.py Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
2025-08-02 19:21:31 +06:00 · 2022-11-28 09:44:03 -05:00 · 2022-11-28 09:44:03 -05:00 · 98122794d4
commit 98122794d4
parent 134a8e21ae
1 changed files with 20 additions and 6 deletions
--- a/src/transformers/models/distilbert/modeling_distilbert.py
+++ b/src/transformers/models/distilbert/modeling_distilbert.py
@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 """
 PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in
 part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
@ -141,7 +142,10 @@ class MultiHeadSelfAttention(nn.Module):
        self.dim = config.dim
        self.dropout = nn.Dropout(p=config.attention_dropout)

-        assert self.dim % self.n_heads == 0
+        # Have an even number of multi heads that divide the dimensions
+        if self.dim % self.n_heads != 0:
+            # Raise value errors for even multi-head attention nodes
+            raise ValueError(f"self.n_heads: {self.n_heads} must divide self.dim: {self.dim} evenly")

        self.q_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
        self.k_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
@ -255,7 +259,9 @@ class TransformerBlock(nn.Module):
    def __init__(self, config: PretrainedConfig):
        super().__init__()

-        assert config.dim % config.n_heads == 0
+        # Have an even number of Configure multi-heads
+        if config.dim % config.n_heads != 0:
+            raise ValueError(f"config.n_heads {config.n_heads} must divide config.dim {config.dim} evenly")

        self.attention = MultiHeadSelfAttention(config)
        self.sa_layer_norm = nn.LayerNorm(normalized_shape=config.dim, eps=1e-12)
@ -291,7 +297,9 @@ class TransformerBlock(nn.Module):
        if output_attentions:
            sa_output, sa_weights = sa_output  # (bs, seq_length, dim), (bs, n_heads, seq_length, seq_length)
        else:  # To handle these `output_attentions` or `output_hidden_states` cases returning tuples
-            assert type(sa_output) == tuple
+            if type(sa_output) != tuple:
+                raise TypeError(f"sa_output must be a tuple but it is {type(sa_output)} type")
+
            sa_output = sa_output[0]
        sa_output = self.sa_layer_norm(sa_output + x)  # (bs, seq_length, dim)

@ -320,6 +328,7 @@ class Transformer(nn.Module):
        output_hidden_states: bool = False,
        return_dict: Optional[bool] = None,
    ) -> Union[BaseModelOutput, Tuple[torch.Tensor, ...]]:  # docstyle-ignore
+
        """
        Parameters:
            x: torch.tensor(bs, seq_length, dim) Input sequence embedded.
@ -348,11 +357,14 @@ class Transformer(nn.Module):
            hidden_state = layer_outputs[-1]

            if output_attentions:
-                assert len(layer_outputs) == 2
+                if len(layer_outputs) != 2:
+                    raise ValueError(f"The length of the layer_outputs should be 2, but it is {len(layer_outputs)}")
+
                attentions = layer_outputs[0]
                all_attentions = all_attentions + (attentions,)
            else:
-                assert len(layer_outputs) == 1
+                if len(layer_outputs) != 1:
+                    raise ValueError(f"The length of the layer_outputs should be 1, but it is {len(layer_outputs)}")

        # Add last layer
        if output_hidden_states:
@ -810,7 +822,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):

        self.distilbert = DistilBertModel(config)
        self.qa_outputs = nn.Linear(config.dim, config.num_labels)
-        assert config.num_labels == 2
+        if config.num_labels != 2:
+            raise ValueError(f"config.num_labels should be 2, but it is {config.num_labels}")
+
        self.dropout = nn.Dropout(config.qa_dropout)

        # Initialize weights and apply final processing