again

2025-08-02 03:01:07 +06:00 · 2025-06-20 13:19:50 +02:00 · 2025-06-20 13:19:50 +02:00 · 1fdb9f3908
commit 1fdb9f3908
parent 3dfebf2fc0
4 changed files with 9 additions and 14 deletions
--- a/tests/models/perceiver/test_modeling_perceiver.py
+++ b/tests/models/perceiver/test_modeling_perceiver.py
@ -842,11 +842,8 @@ def prepare_img():

 # Helper functions for optical flow integration test
 def prepare_optical_flow_images():
-    dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
-    image1 = Image.open(dataset[0]["file"]).convert("RGB")
-    image2 = Image.open(dataset[0]["file"]).convert("RGB")
-
-    return image1, image2
+    ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
+    return list(ds["image"][:2])


 def normalize(img):
--- a/tests/models/vilt/test_modeling_vilt.py
+++ b/tests/models/vilt/test_modeling_vilt.py
@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):

        processor = self.default_processor

-        dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test")
-        image1 = Image.open(dataset[0]["file"]).convert("RGB")
-        image2 = Image.open(dataset[1]["file"]).convert("RGB")
+        dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
+        image1 = dataset[0]["image"]
+        image2 = dataset[1]["image"]

        text = (
            "The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
--- a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py
+++ b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py
@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
    def test_inference_handwritten(self):
        model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)

-        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
-        image = Image.open(dataset[0]["file"]).convert("RGB")
+        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
+        image = dataset[0]["image"]

        processor = self.default_processor
        pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
@ -1175,7 +1175,7 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
        model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)

        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
-        image = Image.open(dataset[1]["file"]).convert("RGB")
+        image = dataset[0]["image"]

        processor = self.default_processor
        pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @require_torch
    @require_pyctcdecode
    def test_large_model_pt_with_lm(self):
-        dataset = load_dataset("Narsil/asr_dummy", streaming=True)
-        third_item = next(iter(dataset["test"].skip(3)))
-        filename = third_item["file"]
+        filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")

        speech_recognizer = pipeline(
            task="automatic-speech-recognition",