diff --git a/src/transformers/modeling_tf_flaubert.py b/src/transformers/modeling_tf_flaubert.py index 9a0cc9c26c5..792d5d3c734 100644 --- a/src/transformers/modeling_tf_flaubert.py +++ b/src/transformers/modeling_tf_flaubert.py @@ -296,7 +296,7 @@ class TFFlaubertMainLayer(TFXLMMainLayer): else: tensor_normalized = self.layer_norm1[i](tensor) attn_outputs = self.attentions[i]( - tensor_normalized, attn_mask, None, cache, head_mask[i], training=training + tensor_normalized, attn_mask, None, cache, head_mask[i], output_attentions, training=training ) attn = attn_outputs[0] if output_attentions: