Remove padding_masks from gpt_bigcode. (#27348)

Update modeling_gpt_bigcode.py
2025-07-31 18:22:34 +06:00 · 2023-11-07 22:54:43 +05:30 · 2023-11-07 22:54:43 +05:30 · cc9f27bb1e
commit cc9f27bb1e
parent 8c91f15ae5
1 changed files with 0 additions and 15 deletions
--- a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+++ b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@ -235,16 +235,10 @@ class GPTBigCodeAttention(nn.Module):
        encoder_attention_mask: Optional[torch.Tensor] = None,
        use_cache: Optional[bool] = False,
        output_attentions: Optional[bool] = False,
        **kwargs,
    ) -> Union[
        Tuple[torch.Tensor, Optional[torch.Tensor]],
        Tuple[torch.Tensor, Optional[torch.Tensor], Tuple[torch.Tensor, ...]],
    ]:
        if "padding_mask" in kwargs:
            logger.warning_once(
                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
            )
        if encoder_hidden_states is not None:
            if not hasattr(self, "q_attn") or not self.is_cross_attention:
                raise ValueError(
@ -308,19 +302,10 @@ class GPTBigCodeFlashAttention2(GPTBigCodeAttention):
        encoder_attention_mask: Optional[torch.Tensor] = None,
        use_cache: Optional[bool] = False,
        output_attentions: Optional[bool] = False,
        **kwargs,
    ) -> Union[
        Tuple[torch.Tensor, Optional[torch.Tensor]],
        Tuple[torch.Tensor, Optional[torch.Tensor], Tuple[torch.Tensor, ...]],
    ]:
        if "padding_mask" in kwargs:
            logger.warning_once(
                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
            )
            # overwrite attention_mask with padding_mask
            attention_mask = kwargs.pop("padding_mask")
        if encoder_hidden_states is not None:
            if not hasattr(self, "q_attn") or not self.is_cross_attention:
                raise ValueError(