diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index a28a0942508..ee5b32886b0 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -312,10 +312,12 @@ def booleans_processing(config, **kwargs): final_booleans = {} if tf.executing_eagerly(): - # Pure conv models (such as ConvNext) do not have `output_attentions` - final_booleans["output_attentions"] = kwargs.get("output_attentions", None) - if final_booleans["output_attentions"] is None: - final_booleans["output_attentions"] = config.output_attentions + # Pure conv models (such as ConvNext) do not have `output_attentions`. If the signature has + # `output_attentions`, it will be present here in `kwargs`, even if unset (in that case, as `None`) + if "output_attentions" in kwargs: + final_booleans["output_attentions"] = ( + kwargs["output_attentions"] if kwargs["output_attentions"] is not None else config.output_attentions + ) final_booleans["output_hidden_states"] = ( kwargs["output_hidden_states"] if kwargs["output_hidden_states"] is not None @@ -330,7 +332,10 @@ def booleans_processing(config, **kwargs): kwargs["use_cache"] if kwargs["use_cache"] is not None else getattr(config, "use_cache", None) ) else: - final_booleans["output_attentions"] = config.output_attentions + # Pure conv models (such as ConvNext) do not have `output_attentions`. If the signature has + # `output_attentions`, it will be present here in `kwargs`, even if unset (in that case, as `None`) + if "output_attentions" in kwargs: + final_booleans["output_attentions"] = config.output_attentions final_booleans["output_hidden_states"] = config.output_hidden_states if kwargs.get("return_dict", None) not in (None, True): @@ -403,7 +408,7 @@ def input_processing(func, config, input_ids, **kwargs): Two lists, one for the missing layers, and another one for the unexpected layers. """ signature = dict(inspect.signature(func).parameters) - signature.pop("kwargs", None) + has_kwargs = bool(signature.pop("kwargs", None)) signature.pop("self", None) parameter_names = list(signature.keys()) output = {} @@ -433,12 +438,14 @@ def input_processing(func, config, input_ids, **kwargs): elif "past_key_values" in kwargs["kwargs_call"] and "past" in parameter_names: kwargs["past"] = kwargs["kwargs_call"].pop("past_key_values") - if len(kwargs["kwargs_call"]) > 0: - raise ValueError( - f"The following keyword arguments are not supported by this model: {list(kwargs['kwargs_call'].keys())}." - ) - - kwargs.pop("kwargs_call") + if has_kwargs: + output["kwargs"] = kwargs.pop("kwargs_call", {}) + else: + if len(kwargs["kwargs_call"]) > 0: + raise ValueError( + f"The following keyword arguments are not supported by this model: {list(kwargs['kwargs_call'].keys())}." + ) + kwargs.pop("kwargs_call") for k, v in kwargs.items(): if isinstance(v, allowed_types) or v is None: diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 51bc5c0ae77..ae325558cd7 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -551,7 +551,6 @@ class TFAlbertMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -785,7 +784,6 @@ class TFAlbertModel(TFAlbertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: outputs = self.albert( input_ids=input_ids, @@ -854,7 +852,6 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss): labels: Optional[Union[np.ndarray, tf.Tensor]] = None, sentence_order_label: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFAlbertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" Return: @@ -976,7 +973,6 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1064,7 +1060,6 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1158,7 +1153,6 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1244,7 +1238,6 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1355,7 +1348,6 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 106a87c043c..9cf3e04054e 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -679,7 +679,6 @@ class TFBartEncoder(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: """ Args: @@ -834,7 +833,6 @@ class TFBartDecoder(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: r""" Args: @@ -1273,7 +1271,6 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 6dfae3d5fb6..5e8775ab0de 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -737,7 +737,6 @@ class TFBertMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: if not self.config.is_decoder: @@ -1067,7 +1066,6 @@ class TFBertModel(TFBertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1174,7 +1172,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): labels: Optional[Union[np.ndarray, tf.Tensor]] = None, next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1302,7 +1299,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1520,7 +1516,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi return_dict: Optional[bool] = None, next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFNextSentencePredictorOutput, Tuple[tf.Tensor]]: r""" Return: @@ -1628,7 +1623,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1723,7 +1717,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1857,7 +1850,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1949,7 +1941,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 80236fab021..4225f8e14e5 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -662,7 +662,6 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -823,7 +822,6 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -1276,7 +1274,6 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index af575e6418b..2d7fe2af613 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -667,7 +667,6 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -827,7 +826,6 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -1253,7 +1251,6 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index f8192ac7aa0..366d0a9eb1d 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -504,7 +504,6 @@ class TFCLIPTextTransformer(tf.keras.layers.Layer): output_hidden_states: bool, return_dict: bool, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: input_shape = shape_list(input_ids) @@ -593,7 +592,6 @@ class TFCLIPTextMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: if input_ids is None: raise ValueError("You have to specify input_ids") @@ -632,7 +630,6 @@ class TFCLIPVisionTransformer(tf.keras.layers.Layer): output_hidden_states: bool, return_dict: bool, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: embedding_output = self.embeddings(pixel_values=pixel_values) @@ -683,7 +680,6 @@ class TFCLIPVisionMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: if pixel_values is None: @@ -762,7 +758,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> tf.Tensor: if input_ids is None: @@ -796,7 +791,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> tf.Tensor: if pixel_values is None: raise ValueError("You have to specify pixel_values") @@ -826,7 +820,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]: if input_ids is None: @@ -1058,7 +1051,6 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: r""" Returns: @@ -1153,7 +1145,6 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: r""" Returns: @@ -1258,7 +1249,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> tf.Tensor: r""" Returns: @@ -1297,7 +1287,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> tf.Tensor: r""" Returns: @@ -1345,7 +1334,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]: r""" Returns: diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index f167325527b..8ec1b18ae74 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -581,7 +581,6 @@ class TFConvBertMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") @@ -751,7 +750,6 @@ class TFConvBertModel(TFConvBertPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.convbert( input_ids=input_ids, @@ -870,7 +868,6 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFMaskedLMOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -979,7 +976,6 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1073,7 +1069,6 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFMultipleChoiceModelOutput]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1188,7 +1183,6 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFTokenClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1268,7 +1262,6 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer start_positions: Optional[tf.Tensor] = None, end_positions: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFQuestionAnsweringModelOutput]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index b952b677524..1cb1b71b613 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -293,7 +293,6 @@ class TFConvNextMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -439,7 +438,6 @@ class TFConvNextModel(TFConvNextPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: r""" Returns: @@ -518,7 +516,6 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 89d3ef56114..2a58467119a 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -268,7 +268,6 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): # If using past key value states, only the last tokens @@ -541,7 +540,6 @@ class TFCTRLModel(TFCTRLPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.transformer( input_ids=input_ids, @@ -653,7 +651,6 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -765,7 +762,6 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index c97b676596f..90ec5ca2c89 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -928,7 +928,6 @@ class TFDebertaMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -1096,7 +1095,6 @@ class TFDebertaModel(TFDebertaPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: outputs = self.deberta( input_ids=input_ids, @@ -1156,7 +1154,6 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1242,7 +1239,6 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1325,7 +1321,6 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1404,7 +1399,6 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index 0a77a6057d9..39cf57a146f 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -1028,7 +1028,6 @@ class TFDebertaV2MainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -1198,7 +1197,6 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: outputs = self.deberta( input_ids=input_ids, @@ -1259,7 +1257,6 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1346,7 +1343,6 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1430,7 +1426,6 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1510,7 +1505,6 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index ccae454ebe0..07aeee9e1f9 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -372,7 +372,6 @@ class TFDistilBertMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") @@ -543,7 +542,6 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: outputs = self.distilbert( input_ids=input_ids, @@ -647,7 +645,6 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -735,7 +732,6 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -817,7 +813,6 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -911,7 +906,6 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1021,7 +1015,6 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index f2b1a1606e4..df290f6f5d7 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -174,7 +174,6 @@ class TFDPREncoderLayer(tf.keras.layers.Layer): output_hidden_states: bool = None, return_dict: bool = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor, ...]]: outputs = self.bert_model( input_ids=input_ids, @@ -235,7 +234,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer): output_hidden_states: bool = False, return_dict: bool = False, training: bool = False, - **kwargs, ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: # notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2] @@ -294,7 +292,6 @@ class TFDPRSpanPredictor(TFPreTrainedModel): output_hidden_states: bool = False, return_dict: bool = False, training: bool = False, - **kwargs, ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: outputs = self.encoder( input_ids=input_ids, @@ -328,7 +325,6 @@ class TFDPREncoder(TFPreTrainedModel): output_hidden_states: bool = False, return_dict: bool = False, training: bool = False, - **kwargs, ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: outputs = self.encoder( input_ids=input_ids, @@ -560,7 +556,6 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): output_hidden_states=None, return_dict=None, training: bool = False, - **kwargs, ) -> Union[TFDPRContextEncoderOutput, Tuple[tf.Tensor, ...]]: r""" Return: @@ -648,7 +643,6 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): output_hidden_states=None, return_dict=None, training: bool = False, - **kwargs, ) -> Union[TFDPRQuestionEncoderOutput, Tuple[tf.Tensor, ...]]: r""" Return: @@ -734,7 +728,6 @@ class TFDPRReader(TFDPRPretrainedReader): output_hidden_states: bool = None, return_dict=None, training: bool = False, - **kwargs, ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: r""" Return: diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 9cbbd4b7e1e..eccb321f100 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -719,7 +719,6 @@ class TFElectraMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: if not self.config.is_decoder: use_cache = False @@ -953,7 +952,6 @@ class TFElectraModel(TFElectraPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1043,7 +1041,6 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFElectraForPreTrainingOutput, Tuple[tf.Tensor]]: r""" Returns: @@ -1180,7 +1177,6 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1290,7 +1286,6 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1383,7 +1378,6 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1501,7 +1495,6 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1583,7 +1576,6 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 1c59493e1bf..9e92e767b1b 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -23,7 +23,7 @@ import tensorflow as tf from ...configuration_utils import PretrainedConfig from ...modeling_tf_outputs import TFBaseModelOutput, TFSeq2SeqLMOutput -from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, input_processing +from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, unpack_inputs from ...tf_utils import shape_list from ...utils import ( DUMMY_INPUTS, @@ -491,6 +491,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) return cls(encoder=encoder, decoder=decoder, config=config) + @unpack_inputs @add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( @@ -559,9 +560,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): if encoder_outputs is None: - encoder_processing_inputs = { - "func": self.encoder.call, - "config": self.encoder.config, + encoder_inputs = { "input_ids": input_ids, "attention_mask": attention_mask, "inputs_embeds": inputs_embeds, @@ -569,14 +568,10 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): "output_hidden_states": output_hidden_states, "return_dict": return_dict, "training": training, - "kwargs_call": {}, } # Add arguments to encoder from `kwargs_encoder` - for k, v in kwargs_encoder.items(): - encoder_processing_inputs[k] = v - - encoder_inputs = input_processing(**encoder_processing_inputs) + encoder_inputs.update(kwargs_encoder) # Handle the case where the inputs are passed as a single dict which contains `labels`. # The `labels` shouldn't be passed to `self.encoder` below, because it is a based model without this @@ -607,9 +602,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): labels, self.config.pad_token_id, self.config.decoder_start_token_id ) - decoder_processing_inputs = { - "func": self.decoder.call, - "config": self.decoder.config, + decoder_inputs = { "input_ids": decoder_input_ids, "attention_mask": decoder_attention_mask, "encoder_hidden_states": encoder_hidden_states, @@ -621,14 +614,11 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): "past_key_values": past_key_values, "return_dict": return_dict, "training": training, - "kwargs_call": {}, } # Add arguments to decoder from `kwargs_decoder` - for k, v in kwargs_decoder.items(): - decoder_processing_inputs[k] = v + decoder_inputs.update(kwargs_decoder) - decoder_inputs = input_processing(**decoder_processing_inputs) decoder_outputs = self.decoder(**decoder_inputs) logits = decoder_outputs[0] diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 8441e180173..f751c0f2250 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -258,7 +258,6 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFBaseModelOutput]: outputs = self.transformer( input_ids=input_ids, @@ -490,7 +489,6 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFBaseModelOutput]: # removed: src_enc=None, src_len=None @@ -808,7 +806,6 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFFlaubertWithLMHeadModelOutput]: transformer_outputs = self.transformer( diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 56e6bf13b49..c1ddef0ad9c 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -761,7 +761,6 @@ class TFFunnelBaseLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -835,7 +834,6 @@ class TFFunnelMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") @@ -1117,7 +1115,6 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]: return self.funnel( input_ids=input_ids, @@ -1165,7 +1162,6 @@ class TFFunnelModel(TFFunnelPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]: return self.funnel( @@ -1293,7 +1289,6 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1369,7 +1364,6 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1455,7 +1449,6 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1566,7 +1559,6 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1645,7 +1637,6 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: bool = False, - **kwargs, ) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 88b4fb5ed60..8a35208b52e 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -367,7 +367,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -730,7 +729,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -920,7 +918,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1038,7 +1035,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFGPT2DoubleHeadsModelOutput, Tuple[tf.Tensor]]: r""" mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): @@ -1195,7 +1191,6 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutputWithPast, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index ce5c5d78e5a..702b163f471 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -390,7 +390,6 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -672,7 +671,6 @@ class TFGPTJModel(TFGPTJPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ): r""" use_cache (`bool`, *optional*, defaults to `True`): @@ -781,7 +779,6 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ): r""" labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -886,7 +883,6 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ): r""" labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1011,7 +1007,6 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss) output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ): r""" start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index e6fd771d37e..86b2fc5a38a 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -706,7 +706,6 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -928,7 +927,6 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: r""" Returns: @@ -1048,7 +1046,6 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1172,7 +1169,6 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1303,7 +1299,6 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 4519f5df980..8381d81afb4 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1666,7 +1666,6 @@ class TFLEDEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -1911,7 +1910,6 @@ class TFLEDDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -2333,7 +2331,6 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel): return_dict=None, labels=None, training=False, - **kwargs, ): """ Returns: @@ -2429,7 +2426,7 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel): decoder_head_mask=None, use_cache=None, encoder_outputs=None, - **kwargs, + **kwargs ): # cut decoder_input_ids if past is used if past is not None: diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 762f872ee70..850a8113f6a 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -1676,7 +1676,6 @@ class TFLongformerMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -2023,7 +2022,6 @@ class TFLongformerModel(TFLongformerPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerBaseModelOutputWithPooling, Tuple[tf.Tensor]]: outputs = self.longformer( @@ -2100,7 +2098,6 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -2194,7 +2191,6 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -2340,7 +2336,6 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerSequenceClassifierOutput, Tuple[tf.Tensor]]: if global_attention_mask is None and input_ids is not None: @@ -2450,7 +2445,6 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -2580,7 +2574,6 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla return_dict: Optional[bool] = None, labels: Optional[Union[np.array, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFLongformerTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index efa812a5965..2101b7cf1f5 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -685,7 +685,6 @@ class TFLxmertMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -946,7 +945,6 @@ class TFLxmertModel(TFLxmertPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.lxmert( input_ids, @@ -1282,7 +1280,6 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" masked_lm_labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index aa766e68154..a696a5648fe 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -707,7 +707,6 @@ class TFMarianEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -866,7 +865,6 @@ class TFMarianDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -1296,7 +1294,6 @@ class TFMarianMTModel(TFMarianPreTrainedModel, TFCausalLanguageModelingLoss): return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 3f2ea655f45..021dc21f21a 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -684,7 +684,6 @@ class TFMBartEncoder(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: """ Args: @@ -848,7 +847,6 @@ class TFMBartDecoder(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[ TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor] ]: @@ -1278,7 +1276,7 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo decoder_head_mask: Optional[tf.Tensor] = None, cross_attn_head_mask: Optional[tf.Tensor] = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: [Tuple[Tuple[tf.Tensor]]] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] = None, inputs_embeds: Optional[tf.Tensor] = None, decoder_inputs_embeds: Optional[tf.Tensor] = None, use_cache: Optional[bool] = None, @@ -1287,7 +1285,6 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo return_dict: Optional[bool] = None, labels: Optional[tf.Tensor] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: """ labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 007be43f5f0..5d1c74252e9 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -692,7 +692,6 @@ class TFMobileBertMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") @@ -928,7 +927,6 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.mobilebert( input_ids=input_ids, @@ -993,7 +991,6 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Return: @@ -1092,7 +1089,6 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1176,7 +1172,6 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS return_dict=None, next_sentence_label=None, training=False, - **kwargs, ): r""" Return: @@ -1287,7 +1282,6 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1381,7 +1375,6 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn start_positions=None, end_positions=None, training=False, - **kwargs, ): r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1498,7 +1491,6 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1626,7 +1618,6 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 5edd73c4170..0e8c61e3403 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -497,7 +497,6 @@ class TFMPNetMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -686,7 +685,6 @@ class TFMPNetModel(TFMPNetPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.mpnet( input_ids=input_ids, @@ -803,7 +801,6 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss): return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -909,7 +906,6 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1000,7 +996,6 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss): return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1112,7 +1107,6 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 80d7a9abd19..40a94c18815 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -249,7 +249,6 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if input_ids is not None and inputs_embeds is not None: @@ -522,7 +521,6 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFBaseModelOutput]: outputs = self.transformer( @@ -586,7 +584,6 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFCausalLMOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -669,7 +666,6 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFOpenAIGPTDoubleHeadsModelOutput]: r""" mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): @@ -813,7 +809,6 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 26f3ef46119..d7eea1660a4 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -710,7 +710,6 @@ class TFPegasusEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -872,7 +871,6 @@ class TFPegasusDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -1305,7 +1303,6 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua return_dict=None, labels=None, training=False, - **kwargs, ): """ labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 9a3892f409f..f40ea6f6f1c 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -660,7 +660,6 @@ class TFRemBertMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: if not self.config.is_decoder: @@ -959,7 +958,6 @@ class TFRemBertModel(TFRemBertPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1060,7 +1058,6 @@ class TFRemBertForMaskedLM(TFRemBertPreTrainedModel, TFMaskedLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1155,7 +1152,6 @@ class TFRemBertForCausalLM(TFRemBertPreTrainedModel, TFCausalLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1283,7 +1279,6 @@ class TFRemBertForSequenceClassification(TFRemBertPreTrainedModel, TFSequenceCla return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1374,7 +1369,6 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss) return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1494,7 +1488,6 @@ class TFRemBertForTokenClassification(TFRemBertPreTrainedModel, TFTokenClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1575,7 +1568,6 @@ class TFRemBertForQuestionAnswering(TFRemBertPreTrainedModel, TFQuestionAnswerin start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index a62659582b7..b63d99a901b 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -624,7 +624,6 @@ class TFRobertaMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: if not self.config.is_decoder: @@ -936,7 +935,6 @@ class TFRobertaModel(TFRobertaPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1093,7 +1091,6 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1196,7 +1193,6 @@ class TFRobertaForCausalLM(TFRobertaPreTrainedModel, TFCausalLanguageModelingLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1353,7 +1349,6 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1449,7 +1444,6 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1567,7 +1561,6 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1655,7 +1648,6 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index bed8ecf975c..020824bb37e 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -614,7 +614,6 @@ class TFRoFormerMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -817,7 +816,6 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: outputs = self.roformer( input_ids=input_ids, @@ -877,7 +875,6 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -953,7 +950,6 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFCausalLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1064,7 +1060,6 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1155,7 +1150,6 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1269,7 +1263,6 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1348,7 +1341,6 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 6c78ab1b58f..7848630314d 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -791,7 +791,6 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -957,7 +956,6 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 133103f3e85..d7fd5b30145 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -654,7 +654,6 @@ class TFT5MainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ) -> Tuple: if input_ids is not None and inputs_embeds is not None: @@ -1152,7 +1151,6 @@ class TFT5Model(TFT5PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFSeq2SeqModelOutput]: r""" Returns: @@ -1329,7 +1327,6 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFSeq2SeqLMOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1611,6 +1608,10 @@ class TFT5EncoderModel(TFT5PreTrainedModel): encoder_config.use_cache = False self.encoder = TFT5MainLayer(encoder_config, embed_tokens, name="encoder") + @property + def dummy_inputs(self): + return {"input_ids": tf.constant(DUMMY_INPUTS)} + def get_encoder(self): return self.encoder @@ -1627,7 +1628,6 @@ class TFT5EncoderModel(TFT5PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFBaseModelOutput]: r""" Returns: @@ -1670,6 +1670,19 @@ class TFT5EncoderModel(TFT5PreTrainedModel): attentions=encoder_outputs.attentions, ) + @tf.function( + input_signature=[ + { + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + } + ] + ) + def serving(self, inputs): + output = self.call(inputs) + + return self.serving_output(output) + # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output def serving_output(self, output): hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 8f2138f2fba..b6a2f10d120 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -770,7 +770,6 @@ class TFTapasMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: if input_ids is not None and inputs_embeds is not None: @@ -980,7 +979,6 @@ class TFTapasModel(TFTapasPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: r""" Returns: @@ -1067,7 +1065,6 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1285,7 +1282,6 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTableQuestionAnsweringOutput, Tuple[tf.Tensor]]: r""" table_mask (`tf.Tensor` of shape `(batch_size, seq_length)`, *optional*): @@ -1602,7 +1598,6 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index d5dc28c3650..8ad931150ed 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -550,7 +550,6 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library @@ -898,7 +897,6 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.transformer( input_ids=input_ids, @@ -979,7 +977,6 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): return_dict=None, labels=None, training=False, - **kwargs, ): if input_ids is not None: bsz, tgt_len = shape_list(input_ids)[:2] @@ -1088,7 +1085,6 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[Tuple, TFTransfoXLSequenceClassifierOutputWithPast]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index eeaca58c5a0..edc2973a073 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -23,7 +23,7 @@ import tensorflow as tf from ...configuration_utils import PretrainedConfig from ...modeling_tf_outputs import TFBaseModelOutput, TFSeq2SeqLMOutput -from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, input_processing +from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, unpack_inputs from ...tf_utils import shape_list from ...utils import ( DUMMY_INPUTS, @@ -510,6 +510,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) return cls(encoder=encoder, decoder=decoder, config=config) + @unpack_inputs @add_start_docstrings_to_model_forward( VISION_ENCODER_DECODER_INPUTS_DOCSTRING.format("batch_size, sequence_length") ) @@ -585,21 +586,16 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos if encoder_outputs is None: - encoder_processing_inputs = { - "func": self.encoder.call, - "config": self.encoder.config, + encoder_inputs = { "input_ids": pixel_values, "output_attentions": output_attentions, "output_hidden_states": output_hidden_states, "return_dict": return_dict, "training": training, - "kwargs_call": {}, } # Add arguments to encoder from `kwargs_encoder` - encoder_processing_inputs.update(kwargs_encoder) - - encoder_inputs = input_processing(**encoder_processing_inputs) + encoder_inputs.update(kwargs_encoder) if "input_ids" in encoder_inputs: encoder_inputs["pixel_values"] = encoder_inputs.pop("input_ids") @@ -639,9 +635,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos batch_size, sequence_length = shape_list(encoder_hidden_states)[:2] encoder_attention_mask = tf.ones(shape=(batch_size, sequence_length), dtype=tf.int32) - decoder_processing_inputs = { - "func": self.decoder.call, - "config": self.decoder.config, + decoder_inputs = { "input_ids": decoder_input_ids, "attention_mask": decoder_attention_mask, "encoder_hidden_states": encoder_hidden_states, @@ -653,13 +647,11 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos "past_key_values": past_key_values, "return_dict": return_dict, "training": training, - "kwargs_call": {}, } # Add arguments to decoder from `kwargs_decoder` - decoder_processing_inputs.update(kwargs_decoder) + decoder_inputs.update(kwargs_decoder) - decoder_inputs = input_processing(**decoder_processing_inputs) decoder_outputs = self.decoder(**decoder_inputs) logits = decoder_outputs[0] diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index e2e946d8c9f..cbf935f4f74 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -486,7 +486,6 @@ class TFViTMainLayer(tf.keras.layers.Layer): interpolate_pos_encoding: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: if pixel_values is None: @@ -656,7 +655,6 @@ class TFViTModel(TFViTPreTrainedModel): interpolate_pos_encoding: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: r""" Returns: @@ -757,7 +755,6 @@ class TFViTForImageClassification(TFViTPreTrainedModel, TFSequenceClassification return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index 40f100b64ff..6ff588fce3d 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -647,7 +647,6 @@ class TFViTMAEMainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFViTMAEModelOutput, Tuple[tf.Tensor]]: embedding_output, mask, ids_restore = self.embeddings( pixel_values=pixel_values, training=training, noise=noise @@ -811,7 +810,6 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFViTMAEModelOutput, Tuple[tf.Tensor]]: r""" Returns: @@ -1028,7 +1026,6 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFViTMAEForPreTrainingOutput, Tuple[tf.Tensor]]: r""" Returns: diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index dbb994ed47c..46b41fba3ae 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -360,7 +360,6 @@ class TFXLMMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): # removed: src_enc=None, src_len=None @@ -707,7 +706,6 @@ class TFXLMModel(TFXLMPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.transformer( input_ids=input_ids, @@ -843,7 +841,6 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): transformer_outputs = self.transformer( input_ids=input_ids, @@ -917,7 +914,6 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1025,7 +1021,6 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): return_dict=None, labels=None, training=False, - **kwargs, ): if input_ids is not None: num_choices = shape_list(input_ids)[1] @@ -1150,7 +1145,6 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos return_dict=None, labels=None, training=False, - **kwargs, ): r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1237,7 +1231,6 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL start_positions=None, end_positions=None, training=False, - **kwargs, ): r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 3a77c4845df..d81924d3451 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -597,7 +597,6 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): if training and use_mems is None: @@ -1152,7 +1151,6 @@ class TFXLNetModel(TFXLNetPreTrainedModel): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): outputs = self.transformer( input_ids=input_ids, @@ -1262,7 +1260,6 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFXLNetLMHeadModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1394,7 +1391,6 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFXLNetForSequenceClassificationOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1501,7 +1497,6 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFXLNetForMultipleChoiceOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): @@ -1623,7 +1618,6 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFXLNetForTokenClassificationOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1711,7 +1705,6 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFXLNetForQuestionAnsweringSimpleOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index da2a0a3828f..2d9914eebd6 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -653,7 +653,6 @@ class TF{{cookiecutter.camelcase_modelname}}MainLayer(tf.keras.layers.Layer): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: if not self.config.is_decoder: @@ -949,7 +948,6 @@ class TF{{cookiecutter.camelcase_modelname}}Model(TF{{cookiecutter.camelcase_mod output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1049,7 +1047,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1146,7 +1143,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): @@ -1289,7 +1285,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(TF{{cookie return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1379,7 +1374,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -1506,7 +1500,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(TF{{cookiecut return_dict: Optional[bool] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): @@ -1588,7 +1581,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, training: Optional[bool] = False, - **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): @@ -2262,7 +2254,6 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): """ Args: @@ -2421,7 +2412,6 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): output_hidden_states=None, return_dict=None, training=False, - **kwargs, ): r""" Args: @@ -2876,7 +2866,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec return_dict=None, labels=None, training=False, - **kwargs, ): """ Returns: diff --git a/tests/convbert/test_modeling_tf_convbert.py b/tests/convbert/test_modeling_tf_convbert.py index e2d68876263..2ae29c3e4a5 100644 --- a/tests/convbert/test_modeling_tf_convbert.py +++ b/tests/convbert/test_modeling_tf_convbert.py @@ -355,7 +355,6 @@ class TFConvBertModelTest(TFModelTesterMixin, unittest.TestCase): for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True - inputs_dict["use_cache"] = False config.output_hidden_states = False model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) diff --git a/tests/t5/test_modeling_tf_t5.py b/tests/t5/test_modeling_tf_t5.py index c6585f83b18..7ac0b33e426 100644 --- a/tests/t5/test_modeling_tf_t5.py +++ b/tests/t5/test_modeling_tf_t5.py @@ -346,6 +346,11 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): self.assertEqual(model.get_input_embeddings().weight.shape[0], len(tokenizer)) self.assertNotEqual(model.get_input_embeddings().weight.shape[0], original_vocab_size) + # This test is run in `TFT5EncoderOnlyModelTest`, where the main layer has the same inputs as the model + @unittest.skip(reason="The inputs of the Main Layer are different.") + def test_keras_save_load(self): + pass + class TFT5EncoderOnlyModelTester: def __init__( diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 9473a50f53a..b72034de695 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -573,7 +573,12 @@ class TFModelTesterMixin: pt_model = pt_model_class(config) tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - tf_inputs_dict_maybe_with_labels = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + tf_inputs_dict_maybe_with_labels = self._prepare_for_class( + inputs_dict, + model_class, + # Not all models accept "labels" in the forward pass (yet :) ) + return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False, + ) # Check we can load pt model in tf and vice-versa with model => model functions tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) @@ -722,7 +727,6 @@ class TFModelTesterMixin: for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True - inputs_dict["use_cache"] = False config.output_hidden_states = False model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) @@ -944,10 +948,6 @@ class TFModelTesterMixin: dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs) - tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - check_equivalence(model, tuple_inputs, dict_inputs) - tuple_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) @@ -956,19 +956,25 @@ class TFModelTesterMixin: dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) - tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) + # Not all models accept "labels" in the forward pass (yet :) ) + if "labels" in inspect.signature(model.call).parameters.keys(): + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs) - tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) - tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - check_equivalence( - model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} - ) + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence( + model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} + ) def test_inputs_embeds(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()