diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 8d4a28aa625..38160fa5422 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -283,7 +283,6 @@ def booleans_processing(config, **kwargs): if "use_cache" in kwargs: final_booleans["use_cache"] = kwargs["use_cache"] if kwargs["use_cache"] is not None else config.use_cache - else: if ( kwargs["output_attentions"] is not None diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index f1eb083cfea..0405192a6aa 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -202,6 +202,54 @@ class TFModelTesterMixin: saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") self.assertTrue(os.path.exists(saved_model_dir)) + @slow + def test_saved_model_creation_extended(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True + config.output_attentions = True + + if hasattr(config, "use_cache"): + config.use_cache = True + + encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) + encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) + + for model_class in self.all_model_classes: + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + num_out = len(model(class_inputs_dict)) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) + outputs = model(class_inputs_dict) + + if self.is_encoder_decoder: + output_hidden_states = outputs["encoder_hidden_states"] + output_attentions = outputs["encoder_attentions"] + else: + output_hidden_states = outputs["hidden_states"] + output_attentions = outputs["attentions"] + + self.assertEqual(len(outputs), num_out) + + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + self.assertEqual(len(output_hidden_states), expected_num_layers) + self.assertListEqual( + list(output_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + + self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) + self.assertListEqual( + list(output_attentions[0].shape[-3:]), + [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], + ) + def test_onnx_compliancy(self): if not self.test_onnx: return @@ -263,98 +311,6 @@ class TFModelTesterMixin: onnxruntime.InferenceSession(onnx_model.SerializeToString()) - @slow - def test_saved_model_creation_extended(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = True - - if hasattr(config, "use_cache"): - config.use_cache = True - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - - model(class_inputs_dict) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - self.assertTrue(os.path.exists(saved_model_dir)) - - @slow - def test_saved_model_with_hidden_states_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = False - - if hasattr(config, "use_cache"): - config.use_cache = False - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - num_out = len(model(class_inputs_dict)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - model = tf.keras.models.load_model(saved_model_dir) - outputs = model(class_inputs_dict) - - if self.is_encoder_decoder: - output = outputs["encoder_hidden_states"] - else: - output = outputs["hidden_states"] - - self.assertEqual(len(outputs), num_out) - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - - self.assertEqual(len(output), expected_num_layers) - self.assertListEqual( - list(output[0].shape[-2:]), - [self.model_tester.seq_length, self.model_tester.hidden_size], - ) - - @slow - def test_saved_model_with_attentions_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_attentions = True - config.output_hidden_states = False - - if hasattr(config, "use_cache"): - config.use_cache = False - - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - num_out = len(model(class_inputs_dict)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - model = tf.keras.models.load_model(saved_model_dir) - outputs = model(class_inputs_dict) - - if self.is_encoder_decoder: - output = outputs["encoder_attentions"] - else: - output = outputs["attentions"] - - self.assertEqual(len(outputs), num_out) - self.assertEqual(len(output), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(output[0].shape[-3:]), - [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], - ) - def test_mixed_precision(self): tf.keras.mixed_precision.experimental.set_policy("mixed_float16") @@ -554,7 +510,6 @@ class TFModelTesterMixin: shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared") config.use_cache = False main_layer = main_layer_class(config, embed_tokens=shared) - del inputs_dict["use_cache"] else: main_layer = main_layer_class(config) diff --git a/tests/test_modeling_tf_convbert.py b/tests/test_modeling_tf_convbert.py index 587f3e1dff9..1a7768e7001 100644 --- a/tests/test_modeling_tf_convbert.py +++ b/tests/test_modeling_tf_convbert.py @@ -273,13 +273,13 @@ class TFConvBertModelTest(TFModelTesterMixin, unittest.TestCase): self.model_tester.create_and_check_for_token_classification(*config_and_inputs) @slow - def test_saved_model_with_attentions_output(self): + def test_saved_model_creation_extended(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True config.output_attentions = True - config.output_hidden_states = False if hasattr(config, "use_cache"): - config.use_cache = False + config.use_cache = True encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) @@ -291,14 +291,32 @@ class TFConvBertModelTest(TFModelTesterMixin, unittest.TestCase): with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname, saved_model=True) - model = tf.keras.models.load_model(os.path.join(tmpdirname, "saved_model", "1")) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) outputs = model(class_inputs_dict) - output = outputs["attentions"] + + if self.is_encoder_decoder: + output_hidden_states = outputs["encoder_hidden_states"] + output_attentions = outputs["encoder_attentions"] + else: + output_hidden_states = outputs["hidden_states"] + output_attentions = outputs["attentions"] self.assertEqual(len(outputs), num_out) - self.assertEqual(len(output), self.model_tester.num_hidden_layers) + + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + self.assertEqual(len(output_hidden_states), expected_num_layers) self.assertListEqual( - list(output[0].shape[-3:]), + list(output_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + + self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) + self.assertListEqual( + list(output_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads / 2, encoder_seq_length, encoder_key_length], ) diff --git a/tests/test_modeling_tf_led.py b/tests/test_modeling_tf_led.py index 2bfc2f78b86..eccba105ff3 100644 --- a/tests/test_modeling_tf_led.py +++ b/tests/test_modeling_tf_led.py @@ -370,27 +370,10 @@ class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase): # TODO JP: Make LED XLA compliant pass - def test_saved_model_with_attentions_output(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - - @slow - def test_saved_model_with_hidden_states_output(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass - @slow - def test_saved_model_creation_extended(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): """If tensors not close, or a and b arent both tensors, raise a nice Assertion error.""" diff --git a/tests/test_modeling_tf_longformer.py b/tests/test_modeling_tf_longformer.py index bb8d94dfdf2..6260153ad24 100644 --- a/tests/test_modeling_tf_longformer.py +++ b/tests/test_modeling_tf_longformer.py @@ -339,28 +339,10 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - @slow - def test_saved_model_with_attentions_output(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - - @slow - def test_saved_model_with_hidden_states_output(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass - @slow - def test_saved_model_creation_extended(self): - # Temporarily disable this test in order to find - # how to better handle it without timing out the CI - pass - def test_mixed_precision(self): # TODO JP: Make Longformer float16 compliant pass diff --git a/tests/test_modeling_tf_lxmert.py b/tests/test_modeling_tf_lxmert.py index f2555acaf42..3615117c232 100644 --- a/tests/test_modeling_tf_lxmert.py +++ b/tests/test_modeling_tf_lxmert.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import tempfile import unittest @@ -710,23 +711,34 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase): pass @slow - def test_saved_model_with_hidden_states_output(self): + def test_saved_model_creation_extended(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config.output_hidden_states = True + config.output_attentions = True + + if hasattr(config, "use_cache"): + config.use_cache = True + + encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) + encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) for model_class in self.all_model_classes: class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) model = model_class(config) - model._saved_model_inputs_spec = None - model._set_save_spec(class_inputs_dict) + num_out = len(model(class_inputs_dict)) with tempfile.TemporaryDirectory() as tmpdirname: - tf.saved_model.save(model, tmpdirname) - model = tf.keras.models.load_model(tmpdirname) + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) outputs = model(class_inputs_dict) - language_hidden_states = outputs["language_hidden_states"] vision_hidden_states = outputs["vision_hidden_states"] + language_attentions = outputs["language_attentions"] + vision_attentions = outputs["vision_attentions"] + cross_encoder_attentions = outputs["cross_encoder_attentions"] + + self.assertEqual(len(outputs), num_out) self.assertEqual(len(language_hidden_states), self.model_tester.num_hidden_layers["language"] + 1) self.assertEqual(len(vision_hidden_states), self.model_tester.num_hidden_layers["vision"] + 1) @@ -743,29 +755,6 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase): [num_visual_features, self.model_tester.hidden_size], ) - @slow - def test_saved_model_with_attentions_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_attentions = True - - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - model._saved_model_inputs_spec = None - model._set_save_spec(class_inputs_dict) - - with tempfile.TemporaryDirectory() as tmpdirname: - tf.saved_model.save(model, tmpdirname) - model = tf.keras.models.load_model(tmpdirname) - outputs = model(class_inputs_dict) - - language_attentions = outputs["language_attentions"] - vision_attentions = outputs["vision_attentions"] - cross_encoder_attentions = outputs["cross_encoder_attentions"] - self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"]) self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"]) self.assertEqual(len(cross_encoder_attentions), self.model_tester.num_hidden_layers["cross_encoder"]) diff --git a/tests/test_modeling_tf_t5.py b/tests/test_modeling_tf_t5.py index fb215a3880f..395dd95197d 100644 --- a/tests/test_modeling_tf_t5.py +++ b/tests/test_modeling_tf_t5.py @@ -237,7 +237,6 @@ class TFT5ModelTester: "input_ids": input_ids, "decoder_input_ids": input_ids, "decoder_attention_mask": input_mask, - "use_cache": False, } return config, inputs_dict