diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py
index 078e825ef6b..9565aa631ee 100644
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -19,6 +19,7 @@ from huggingface_hub import VideoClassificationOutputElement, hf_hub_download
 from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEFeatureExtractor
 from transformers.pipelines import VideoClassificationPipeline, pipeline
 from transformers.testing_utils import (
+    _run_pipeline_tests,
     compare_pipeline_output_to_hub_spec,
     is_pipeline_test,
     nested_simplify,
@@ -39,6 +40,11 @@ from .test_pipelines_common import ANY
 class VideoClassificationPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
 
+    if _run_pipeline_tests:
+        example_video_filepath = hf_hub_download(
+            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
+        )
+
     def get_test_pipeline(
         self,
         model,
@@ -48,9 +54,6 @@ class VideoClassificationPipelineTests(unittest.TestCase):
         processor=None,
         torch_dtype="float32",
     ):
-        example_video_filepath = hf_hub_download(
-            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
-        )
         video_classifier = VideoClassificationPipeline(
             model=model,
             tokenizer=tokenizer,
@@ -61,8 +64,9 @@ class VideoClassificationPipelineTests(unittest.TestCase):
             top_k=2,
         )
         examples = [
-            example_video_filepath,
-            "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
+            self.example_video_filepath,
+            # TODO: re-enable this once we have a stable hub solution for CI
+            # "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
         ]
         return video_classifier, examples
 
diff --git a/tests/utils/test_audio_utils.py b/tests/utils/test_audio_utils.py
index 9ece033d064..f8d88b602b1 100644
--- a/tests/utils/test_audio_utils.py
+++ b/tests/utils/test_audio_utils.py
@@ -39,6 +39,9 @@ if is_librosa_available():
 
 
 class AudioUtilsFunctionTester(unittest.TestCase):
+    # will be set in `def _load_datasamples`
+    _dataset = None
+
     def test_hertz_to_mel(self):
         self.assertEqual(hertz_to_mel(0.0), 0.0)
         self.assertAlmostEqual(hertz_to_mel(100), 150.48910241)
@@ -274,8 +277,9 @@ class AudioUtilsFunctionTester(unittest.TestCase):
     def _load_datasamples(self, num_samples):
         from datasets import load_dataset
 
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
+        if self._dataset is None:
+            self._dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        speech_samples = self._dataset.sort("id").select(range(num_samples))[:num_samples]["audio"]
         return [x["array"] for x in speech_samples]
 
     def test_spectrogram_impulse(self):
diff --git a/utils/fetch_hub_objects_for_ci.py b/utils/fetch_hub_objects_for_ci.py
index 3975921a84a..b7f2f06bebd 100644
--- a/utils/fetch_hub_objects_for_ci.py
+++ b/utils/fetch_hub_objects_for_ci.py
@@ -1,3 +1,5 @@
+from huggingface_hub import hf_hub_download
+
 from transformers.testing_utils import _run_pipeline_tests
 
 
@@ -7,3 +9,4 @@ if __name__ == "__main__":
 
         _ = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
         _ = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1")
+        _ = hf_hub_download(repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset")