diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index ff14ac5b7bf..2c4d4debeac 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -417,8 +417,10 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D):
     def build(self, input_shape):
         if not self.built:
             input_shape = input_shape.as_list()
-            # Conv1D output shapes are checked at build time since TF 2.7, so we need to account for padding
-            input_shape[-2] += self.explicit_padding * 2
+            # If a specific input shape is passed in, we need to modify it to account for padding
+            # Not necessary if those portions of the shape are None
+            if input_shape[-2] is not None:
+                input_shape[-2] += self.explicit_padding * 2
             super().build(input_shape)
 
             self.kernel = tf.Variable(tf.transpose(self.kernel), name="weight_v", trainable=True)
diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py
index 0d64b825d0d..a47b091a09f 100644
--- a/src/transformers/models/sam/modeling_tf_sam.py
+++ b/src/transformers/models/sam/modeling_tf_sam.py
@@ -810,6 +810,7 @@ class TFSamVisionAttention(tf.keras.layers.Layer):
         if self.use_rel_pos:
             if input_size is None:
                 raise ValueError("Input size must be provided if using relative positional encoding.")
+        self.config = config
 
     def build(self, input_shape):
         if self.input_size is not None:
@@ -928,7 +929,7 @@ class TFSamVisionAttention(tf.keras.layers.Layer):
 
         attn_output = tf.reshape(attn_probs @ value, (batch_size, self.num_attention_heads, height, width, -1))
         attn_output = tf.transpose(attn_output, perm=(0, 2, 3, 1, 4))
-        attn_output = tf.reshape(attn_output, (batch_size, height, width, -1))
+        attn_output = tf.reshape(attn_output, (batch_size, height, width, self.config.hidden_size))
 
         attn_output = self.proj(attn_output)
 
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 97174301ccb..7ac4bc10586 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -451,8 +451,10 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D):
     def build(self, input_shape):
         if not self.built:
             input_shape = input_shape.as_list()
-            # Conv1D output shapes are checked at build time since TF 2.7, so we need to account for padding
-            input_shape[-2] += self.explicit_padding * 2
+            # If a specific input shape is passed in, we need to modify it to account for padding
+            # Not necessary if those portions of the shape are None
+            if input_shape[-2] is not None:
+                input_shape[-2] += self.explicit_padding * 2
             super().build(input_shape)
 
             self.kernel = tf.Variable(tf.transpose(self.kernel), name="weight_v", trainable=True)
@@ -1646,7 +1648,7 @@ class TFWav2Vec2ForSequenceClassification(TFWav2Vec2PreTrainedModel):
         if attention_mask is None:
             pooled_output = tf.reduce_mean(hidden_states, axis=1)
         else:
-            padding_mask = self._get_feature_vector_attention_mask(hidden_states.shape[1], attention_mask)
+            padding_mask = self._get_feature_vector_attention_mask(shape_list(hidden_states)[1], attention_mask)
             padding_mask_float = tf.cast(padding_mask, hidden_states.dtype)
             hidden_states = tf.multiply(hidden_states, tf.expand_dims(padding_mask_float, axis=-1))
             pooled_output = tf.divide(
diff --git a/tests/models/blip/test_modeling_tf_blip.py b/tests/models/blip/test_modeling_tf_blip.py
index af7533c6989..a58939c09d9 100644
--- a/tests/models/blip/test_modeling_tf_blip.py
+++ b/tests/models/blip/test_modeling_tf_blip.py
@@ -434,6 +434,13 @@ class TFBlipModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
     def test_pt_tf_model_equivalence(self, allow_missing_keys=True):
         super().test_pt_tf_model_equivalence(allow_missing_keys=allow_missing_keys)
 
+    @unittest.skip("Matt: Re-enable this test when we have a proper export function for TF models.")
+    def test_saved_model_creation(self):
+        # This fails because the if return_loss: conditional can return None or a Tensor and TF hates that.
+        # We could fix that by setting the bool to a constant when exporting, but that requires a dedicated export
+        # function that we don't have yet.
+        pass
+
 
 class BlipTextRetrievalModelTester:
     def __init__(self, parent, text_kwargs=None, vision_kwargs=None, is_training=True):
diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py
index 1cba14cdb22..67d6d234c1c 100644
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@@ -360,6 +360,10 @@ class TFLongformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
     def test_saved_model_creation(self):
         pass
 
+    @unittest.skip("Longformer keeps using potentially symbolic tensors in conditionals and breaks tracing.")
+    def test_compile_tf_model(self):
+        pass
+
 
 @require_tf
 @require_sentencepiece
diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py
index 6d76462fda9..c33579392dc 100644
--- a/tests/models/xlnet/test_modeling_tf_xlnet.py
+++ b/tests/models/xlnet/test_modeling_tf_xlnet.py
@@ -413,6 +413,10 @@ class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
             model = TFXLNetModel.from_pretrained(model_name)
             self.assertIsNotNone(model)
 
+    @unittest.skip("Some of the XLNet models misbehave with flexible input shapes.")
+    def test_compile_tf_model(self):
+        pass
+
     # overwrite since `TFXLNetLMHeadModel` doesn't cut logits/labels
     def test_loss_computation(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py
index 135db86d4d5..b4fd805f5a9 100644
--- a/tests/utils/test_modeling_tf_core.py
+++ b/tests/utils/test_modeling_tf_core.py
@@ -217,6 +217,7 @@ class TFCoreModelTesterMixin:
         for model_class in self.all_model_classes:
             class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
             model = model_class(config)
+            model.build()
             num_out = len(model(class_inputs_dict))
 
             for key in list(class_inputs_dict.keys()):