diff --git a/tests/models/albert/test_modeling_tf_albert.py b/tests/models/albert/test_modeling_tf_albert.py index eaa6791f928..7314eb4749a 100644 --- a/tests/models/albert/test_modeling_tf_albert.py +++ b/tests/models/albert/test_modeling_tf_albert.py @@ -56,7 +56,7 @@ class TFAlbertModelTester: vocab_size=99, embedding_size=16, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -80,7 +80,7 @@ class TFAlbertModelTester: self.vocab_size = 99 self.embedding_size = 16 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index 60b18267299..05720f29780 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -52,7 +52,7 @@ class TFBartModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/bert/test_modeling_tf_bert.py b/tests/models/bert/test_modeling_tf_bert.py index b68a81a1030..335a184d292 100644 --- a/tests/models/bert/test_modeling_tf_bert.py +++ b/tests/models/bert/test_modeling_tf_bert.py @@ -57,7 +57,7 @@ class TFBertModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -80,7 +80,7 @@ class TFBertModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py index 7553bb908ea..26b03a5d6a3 100644 --- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py @@ -48,7 +48,7 @@ class TFBlenderbotModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py index 2118ec6837a..021f171789e 100644 --- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py @@ -48,7 +48,7 @@ class TFBlenderbotSmallModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/blip/test_modeling_tf_blip.py b/tests/models/blip/test_modeling_tf_blip.py index a58939c09d9..ac6f8e3a67c 100644 --- a/tests/models/blip/test_modeling_tf_blip.py +++ b/tests/models/blip/test_modeling_tf_blip.py @@ -64,7 +64,7 @@ class TFBlipVisionModelTester: is_training=True, hidden_size=32, projection_dim=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, @@ -207,7 +207,7 @@ class TFBlipTextModelTester: vocab_size=99, hidden_size=32, projection_dim=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, diff --git a/tests/models/blip/test_modeling_tf_blip_text.py b/tests/models/blip/test_modeling_tf_blip_text.py index 2733a9fa6a4..a3da1a7f675 100644 --- a/tests/models/blip/test_modeling_tf_blip_text.py +++ b/tests/models/blip/test_modeling_tf_blip_text.py @@ -46,7 +46,7 @@ class BlipTextModelTester: vocab_size=99, hidden_size=32, projection_dim=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index 10b9954fc88..897b89d5c36 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -57,7 +57,7 @@ class TFCLIPVisionModelTester: num_channels=3, is_training=True, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, @@ -328,7 +328,7 @@ class TFCLIPTextModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, diff --git a/tests/models/convbert/test_modeling_tf_convbert.py b/tests/models/convbert/test_modeling_tf_convbert.py index 84ed4de818f..5c5d83de300 100644 --- a/tests/models/convbert/test_modeling_tf_convbert.py +++ b/tests/models/convbert/test_modeling_tf_convbert.py @@ -51,7 +51,7 @@ class TFConvBertModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -74,7 +74,7 @@ class TFConvBertModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 384 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/ctrl/test_modeling_tf_ctrl.py b/tests/models/ctrl/test_modeling_tf_ctrl.py index 5c9750d4ded..9a5ebbe34a7 100644 --- a/tests/models/ctrl/test_modeling_tf_ctrl.py +++ b/tests/models/ctrl/test_modeling_tf_ctrl.py @@ -52,7 +52,7 @@ class TFCTRLModelTester(object): self.use_mc_token_ids = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index 9e3701c17ad..fa676434406 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -61,7 +61,7 @@ class TFData2VecVisionModelTester: is_training=True, use_labels=True, hidden_size=32, - num_hidden_layers=4, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/deberta/test_modeling_tf_deberta.py b/tests/models/deberta/test_modeling_tf_deberta.py index 9b69d55001c..14a99ea947e 100644 --- a/tests/models/deberta/test_modeling_tf_deberta.py +++ b/tests/models/deberta/test_modeling_tf_deberta.py @@ -50,7 +50,7 @@ class TFDebertaModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -73,7 +73,7 @@ class TFDebertaModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py index 96ebe375d97..8b9bcc15ea2 100644 --- a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py +++ b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py @@ -50,7 +50,7 @@ class TFDebertaV2ModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/deit/test_modeling_tf_deit.py b/tests/models/deit/test_modeling_tf_deit.py index 3987b11d196..0e34f35b60b 100644 --- a/tests/models/deit/test_modeling_tf_deit.py +++ b/tests/models/deit/test_modeling_tf_deit.py @@ -60,7 +60,7 @@ class TFDeiTModelTester: is_training=True, use_labels=True, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/distilbert/test_modeling_tf_distilbert.py b/tests/models/distilbert/test_modeling_tf_distilbert.py index 4e96c909765..937dd24d6d7 100644 --- a/tests/models/distilbert/test_modeling_tf_distilbert.py +++ b/tests/models/distilbert/test_modeling_tf_distilbert.py @@ -54,7 +54,7 @@ class TFDistilBertModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/dpr/test_modeling_tf_dpr.py b/tests/models/dpr/test_modeling_tf_dpr.py index f788a516339..11351408623 100644 --- a/tests/models/dpr/test_modeling_tf_dpr.py +++ b/tests/models/dpr/test_modeling_tf_dpr.py @@ -53,7 +53,7 @@ class TFDPRModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/electra/test_modeling_tf_electra.py b/tests/models/electra/test_modeling_tf_electra.py index fe60c562710..537cb1df2f9 100644 --- a/tests/models/electra/test_modeling_tf_electra.py +++ b/tests/models/electra/test_modeling_tf_electra.py @@ -54,7 +54,7 @@ class TFElectraModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/esm/test_modeling_tf_esm.py b/tests/models/esm/test_modeling_tf_esm.py index d06e3c59ba8..b687da355a3 100644 --- a/tests/models/esm/test_modeling_tf_esm.py +++ b/tests/models/esm/test_modeling_tf_esm.py @@ -53,7 +53,7 @@ class TFEsmModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/flaubert/test_modeling_tf_flaubert.py b/tests/models/flaubert/test_modeling_tf_flaubert.py index b751445d12c..6d74b55ce34 100644 --- a/tests/models/flaubert/test_modeling_tf_flaubert.py +++ b/tests/models/flaubert/test_modeling_tf_flaubert.py @@ -61,7 +61,7 @@ class TFFlaubertModelTester: self.vocab_size = 99 self.n_special = 0 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.hidden_dropout_prob = 0.1 self.attention_probs_dropout_prob = 0.1 diff --git a/tests/models/gpt2/test_modeling_tf_gpt2.py b/tests/models/gpt2/test_modeling_tf_gpt2.py index cce37aa95e6..a88435acba3 100644 --- a/tests/models/gpt2/test_modeling_tf_gpt2.py +++ b/tests/models/gpt2/test_modeling_tf_gpt2.py @@ -55,7 +55,7 @@ class TFGPT2ModelTester: self.use_mc_token_ids = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/gptj/test_modeling_tf_gptj.py b/tests/models/gptj/test_modeling_tf_gptj.py index 649eab2889f..896df148058 100644 --- a/tests/models/gptj/test_modeling_tf_gptj.py +++ b/tests/models/gptj/test_modeling_tf_gptj.py @@ -51,7 +51,7 @@ class TFGPTJModelTester: self.vocab_size = 99 self.hidden_size = 32 self.rotary_dim = 4 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index 6cd6ae7aaec..1a1a14e3018 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -150,6 +150,10 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase): test_head_masking = False test_onnx = False + def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None): + # We override with a slightly higher tol value, as this model tends to diverge a bit more + super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes) + def setUp(self): self.model_tester = TFGroupViTVisionModelTester(self) self.config_tester = ConfigTester( @@ -381,7 +385,7 @@ class TFGroupViTTextModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, dropout=0.1, @@ -459,6 +463,10 @@ class TFGroupViTTextModelTest(TFModelTesterMixin, unittest.TestCase): test_head_masking = False test_onnx = False + def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None): + # We override with a slightly higher tol value, as this model tends to diverge a bit more + super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes) + def setUp(self): self.model_tester = TFGroupViTTextModelTester(self) self.config_tester = ConfigTester(self, config_class=GroupViTTextConfig, hidden_size=37) @@ -581,6 +589,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test test_attention_outputs = False test_onnx = False + def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None): + # We override with a slightly higher tol value, as this model tends to diverge a bit more + super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes) + def setUp(self): self.model_tester = TFGroupViTModelTester(self) diff --git a/tests/models/hubert/test_modeling_tf_hubert.py b/tests/models/hubert/test_modeling_tf_hubert.py index 0b8e1e2df94..3685e659874 100644 --- a/tests/models/hubert/test_modeling_tf_hubert.py +++ b/tests/models/hubert/test_modeling_tf_hubert.py @@ -59,7 +59,7 @@ class TFHubertModelTester: conv_bias=False, num_conv_pos_embeddings=16, num_conv_pos_embedding_groups=2, - num_hidden_layers=4, + num_hidden_layers=2, num_attention_heads=2, hidden_dropout_prob=0.1, # this is most likely not correctly set yet intermediate_size=20, diff --git a/tests/models/layoutlm/test_modeling_tf_layoutlm.py b/tests/models/layoutlm/test_modeling_tf_layoutlm.py index 2d134f23d42..96ce692a668 100644 --- a/tests/models/layoutlm/test_modeling_tf_layoutlm.py +++ b/tests/models/layoutlm/test_modeling_tf_layoutlm.py @@ -52,7 +52,7 @@ class TFLayoutLMModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index 3a6f34185b5..5ea4cb625c4 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -69,7 +69,7 @@ class TFLayoutLMv3ModelTester: use_labels=True, vocab_size=99, hidden_size=36, - num_hidden_layers=3, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py index a06a29fd383..a4f8ad6a9c5 100644 --- a/tests/models/led/test_modeling_tf_led.py +++ b/tests/models/led/test_modeling_tf_led.py @@ -47,7 +47,7 @@ class TFLEDModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py index 67d6d234c1c..0eda0652268 100644 --- a/tests/models/longformer/test_modeling_tf_longformer.py +++ b/tests/models/longformer/test_modeling_tf_longformer.py @@ -56,7 +56,7 @@ class TFLongformerModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py index 50cff4219a1..9cb9d0061f0 100644 --- a/tests/models/marian/test_modeling_tf_marian.py +++ b/tests/models/marian/test_modeling_tf_marian.py @@ -49,7 +49,7 @@ class TFMarianModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py index 9ca4a7d6932..04aad6cfc63 100644 --- a/tests/models/mbart/test_modeling_tf_mbart.py +++ b/tests/models/mbart/test_modeling_tf_mbart.py @@ -47,7 +47,7 @@ class TFMBartModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py index 607ba5b88a9..b2b1e58ec0b 100644 --- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py +++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py @@ -97,7 +97,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te vocab_size=99, hidden_size=32, embedding_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/mpnet/test_modeling_tf_mpnet.py b/tests/models/mpnet/test_modeling_tf_mpnet.py index 381b6e81dd8..b27b33d8103 100644 --- a/tests/models/mpnet/test_modeling_tf_mpnet.py +++ b/tests/models/mpnet/test_modeling_tf_mpnet.py @@ -51,7 +51,7 @@ class TFMPNetModelTester: use_labels=True, vocab_size=99, hidden_size=64, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=64, hidden_act="gelu", diff --git a/tests/models/openai/test_modeling_tf_openai.py b/tests/models/openai/test_modeling_tf_openai.py index cf4c81dd647..231758064f2 100644 --- a/tests/models/openai/test_modeling_tf_openai.py +++ b/tests/models/openai/test_modeling_tf_openai.py @@ -53,7 +53,7 @@ class TFOpenAIGPTModelTester: self.use_mc_token_ids = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py index 33e908ab23e..21ddaa9f345 100644 --- a/tests/models/pegasus/test_modeling_tf_pegasus.py +++ b/tests/models/pegasus/test_modeling_tf_pegasus.py @@ -47,7 +47,7 @@ class TFPegasusModelTester: use_labels=False, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_dropout_prob=0.1, diff --git a/tests/models/rembert/test_modeling_tf_rembert.py b/tests/models/rembert/test_modeling_tf_rembert.py index e70bd7033fc..8698d3febc8 100644 --- a/tests/models/rembert/test_modeling_tf_rembert.py +++ b/tests/models/rembert/test_modeling_tf_rembert.py @@ -54,7 +54,7 @@ class TFRemBertModelTester: hidden_size=32, input_embedding_size=18, output_embedding_size=43, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -79,7 +79,7 @@ class TFRemBertModelTester: self.hidden_size = 32 self.input_embedding_size = input_embedding_size self.output_embedding_size = output_embedding_size - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/roberta/test_modeling_tf_roberta.py b/tests/models/roberta/test_modeling_tf_roberta.py index 3d7b6953c08..2f2859391ad 100644 --- a/tests/models/roberta/test_modeling_tf_roberta.py +++ b/tests/models/roberta/test_modeling_tf_roberta.py @@ -56,7 +56,7 @@ class TFRobertaModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py index 4e1bd2e319a..9c1a25ccb98 100644 --- a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py +++ b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py @@ -57,7 +57,7 @@ class TFRobertaPreLayerNormModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/roformer/test_modeling_tf_roformer.py b/tests/models/roformer/test_modeling_tf_roformer.py index 52c630e2bea..d0e795b6dd9 100644 --- a/tests/models/roformer/test_modeling_tf_roformer.py +++ b/tests/models/roformer/test_modeling_tf_roformer.py @@ -56,7 +56,7 @@ class TFRoFormerModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", @@ -79,7 +79,7 @@ class TFRoFormerModelTester: self.use_labels = True self.vocab_size = 99 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.intermediate_size = 37 self.hidden_act = "gelu" diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index 46b0f4c5911..201bcdd7aed 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -46,7 +46,7 @@ class TFT5ModelTester: self.vocab_size = 99 self.n_positions = 14 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.d_ff = 37 self.relative_attention_num_buckets = 8 @@ -325,7 +325,7 @@ class TFT5EncoderOnlyModelTester: # For common tests use_attention_mask=True, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, d_ff=37, relative_attention_num_buckets=8, diff --git a/tests/models/tapas/test_modeling_tf_tapas.py b/tests/models/tapas/test_modeling_tf_tapas.py index ce98394cb86..7687144eaf2 100644 --- a/tests/models/tapas/test_modeling_tf_tapas.py +++ b/tests/models/tapas/test_modeling_tf_tapas.py @@ -77,7 +77,7 @@ class TFTapasModelTester: use_labels=True, vocab_size=99, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py index ac820ea8fab..88e759307ed 100644 --- a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py @@ -59,7 +59,7 @@ class TFTransfoXLModelTester: self.d_head = 8 self.d_inner = 128 self.div_val = 2 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.scope = None self.seed = 1 self.eos_token_id = 0 diff --git a/tests/models/vit/test_modeling_tf_vit.py b/tests/models/vit/test_modeling_tf_vit.py index 53862da916e..0db27dfb2eb 100644 --- a/tests/models/vit/test_modeling_tf_vit.py +++ b/tests/models/vit/test_modeling_tf_vit.py @@ -52,7 +52,7 @@ class TFViTModelTester: is_training=True, use_labels=True, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py index bc7d481f5f5..8f6064e0165 100644 --- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py @@ -60,7 +60,7 @@ class TFViTMAEModelTester: is_training=True, use_labels=True, hidden_size=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index 3554d18957c..bd931ea8319 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -130,7 +130,7 @@ class TFWav2Vec2ModelTester: conv_bias=False, num_conv_pos_embeddings=16, num_conv_pos_embedding_groups=2, - num_hidden_layers=4, + num_hidden_layers=2, num_attention_heads=2, hidden_dropout_prob=0.1, # this is most likely not correctly set yet intermediate_size=20, diff --git a/tests/models/xglm/test_modeling_tf_xglm.py b/tests/models/xglm/test_modeling_tf_xglm.py index 3c9e4770f73..54641693c77 100644 --- a/tests/models/xglm/test_modeling_tf_xglm.py +++ b/tests/models/xglm/test_modeling_tf_xglm.py @@ -51,7 +51,7 @@ class TFXGLMModelTester: use_labels=True, vocab_size=99, d_model=32, - num_hidden_layers=5, + num_hidden_layers=2, num_attention_heads=4, ffn_dim=37, activation_function="gelu", diff --git a/tests/models/xlm/test_modeling_tf_xlm.py b/tests/models/xlm/test_modeling_tf_xlm.py index 5b576f02c91..7bfa33828f7 100644 --- a/tests/models/xlm/test_modeling_tf_xlm.py +++ b/tests/models/xlm/test_modeling_tf_xlm.py @@ -61,7 +61,7 @@ class TFXLMModelTester: self.vocab_size = 99 self.n_special = 0 self.hidden_size = 32 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.num_attention_heads = 4 self.hidden_dropout_prob = 0.1 self.attention_probs_dropout_prob = 0.1 diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py index c33579392dc..327bfea6d0a 100644 --- a/tests/models/xlnet/test_modeling_tf_xlnet.py +++ b/tests/models/xlnet/test_modeling_tf_xlnet.py @@ -61,7 +61,7 @@ class TFXLNetModelTester: self.hidden_size = 32 self.num_attention_heads = 4 self.d_inner = 128 - self.num_hidden_layers = 5 + self.num_hidden_layers = 2 self.type_sequence_label_size = 2 self.untie_r = True self.bi_data = False diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index e22635182e5..6e5fd6ce6ca 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -1527,36 +1527,6 @@ class TFModelTesterMixin: if metrics: self.assertTrue(len(accuracy1) == len(accuracy2) > 0, "Missing metrics!") - # Make sure fit works with tf.data.Dataset and results are consistent - dataset = tf.data.Dataset.from_tensor_slices(prepared_for_class) - - if sample_weight is not None: - # Add in the sample weight - weighted_dataset = dataset.map(lambda x: (x, None, tf.convert_to_tensor(0.5, dtype=tf.float32))) - else: - weighted_dataset = dataset - # Pass in all samples as a batch to match other `fit` calls - weighted_dataset = weighted_dataset.batch(len(dataset)) - dataset = dataset.batch(len(dataset)) - # Reinitialize to fix batchnorm again - model.set_weights(model_weights) - - # To match the other calls, don't pass sample weights in the validation data - history3 = model.fit( - weighted_dataset, - validation_data=dataset, - steps_per_epoch=1, - validation_steps=1, - shuffle=False, - ) - val_loss3 = history3.history["val_loss"][0] - self.assertTrue(not isnan(val_loss3)) - accuracy3 = {key: val[0] for key, val in history3.history.items() if key.endswith("accuracy")} - self.check_keras_fit_results(val_loss1, val_loss3) - self.assertEqual(history1.history.keys(), history3.history.keys()) - if metrics: - self.assertTrue(len(accuracy1) == len(accuracy3) > 0, "Missing metrics!") - def test_int_support(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: