Remove token_type_ids from default TF GPT-2 signature (#26962)

Remove token_type_ids from default GPT-2 signature
2025-07-31 02:02:21 +06:00 · 2023-10-23 16:18:02 +01:00 · 2023-10-23 16:18:02 +01:00 · f7354a3bd6
commit f7354a3bd6
parent c0b5ad9473
1 changed files with 10 additions and 0 deletions
--- a/src/transformers/models/gpt2/modeling_tf_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py
@ -521,6 +521,16 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel):
    # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
    _keys_to_ignore_on_load_unexpected = [r"h.\d+.attn.bias", r"h.\d+.crossattention.bias"]

+    @property
+    def input_signature(self):
+        # Although GPT-2 supports token_type_ids in theory, in practice they are rarely used, and the implementation
+        # means that passing token_type_ids=0 yields different outputs from token_type_ids=None.
+        # Therefore, we remove the token_type_ids argument by default, even though it would usually be included.
+        return {
+            "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
+            "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+        }
+

@dataclass
 class TFGPT2DoubleHeadsModelOutput(ModelOutput):