diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index b1e47131c75..3b04ccbb79a 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1213,7 +1213,10 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer):
         if inputs["attention_mask"] is not None:
             # compute real output lengths according to convolution formula
             output_lengths = self._get_feat_extract_output_lengths(tf.reduce_sum(inputs["attention_mask"], -1))
-            attention_mask = tf.sequence_mask(output_lengths, dtype=hidden_states.dtype)
+
+            attention_mask = tf.sequence_mask(
+                output_lengths, maxlen=shape_list(hidden_states)[1], dtype=hidden_states.dtype
+            )
 
         hidden_states = self.feature_projection(hidden_states, training=inputs["training"])