diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 16accc1b944..9bf6ba6edee 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -477,7 +477,7 @@ class TFRemBertEncoder(tf.keras.layers.Layer): training: bool = False, ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states) - all_hidden_states = (hidden_states,) if output_hidden_states else None + all_hidden_states = () if output_hidden_states else None all_attentions = () if output_attentions else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None diff --git a/tests/test_modeling_tf_rembert.py b/tests/test_modeling_tf_rembert.py index 8908e6d02bd..a6d7d35cf98 100644 --- a/tests/test_modeling_tf_rembert.py +++ b/tests/test_modeling_tf_rembert.py @@ -90,9 +90,6 @@ class TFRemBertModelTester: self.num_choices = 4 self.scope = None - # RemBERT also returns the upprojected word embeddings as an hidden layers - self.expected_num_hidden_layers = self.num_hidden_layers + 2 - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)