diff --git a/docs/source/en/model_doc/seamless_m4t.md b/docs/source/en/model_doc/seamless_m4t.md index 1d42de0a544..d523408f78f 100644 --- a/docs/source/en/model_doc/seamless_m4t.md +++ b/docs/source/en/model_doc/seamless_m4t.md @@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio: ```python >>> # let's load an audio sample from an Arabic speech corpus >>> from datasets import load_dataset ->>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True) +>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True) >>> audio_sample = next(iter(dataset))["audio"] >>> # now, process it diff --git a/docs/source/en/model_doc/seamless_m4t_v2.md b/docs/source/en/model_doc/seamless_m4t_v2.md index 7898799ee44..c98b7b4dd8d 100644 --- a/docs/source/en/model_doc/seamless_m4t_v2.md +++ b/docs/source/en/model_doc/seamless_m4t_v2.md @@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio: ```python >>> # let's load an audio sample from an Arabic speech corpus >>> from datasets import load_dataset ->>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True) +>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True) >>> audio_sample = next(iter(dataset))["audio"] >>> # now, process it diff --git a/examples/flax/test_flax_examples.py b/examples/flax/test_flax_examples.py index 132be94e318..ab1930e001c 100644 --- a/examples/flax/test_flax_examples.py +++ b/examples/flax/test_flax_examples.py @@ -264,7 +264,6 @@ class ExamplesTests(TestCasePlus): --dataset_config clean --train_split_name validation --eval_split_name validation - --trust_remote_code --output_dir {tmp_dir} --overwrite_output_dir --num_train_epochs=2 diff --git a/examples/pytorch/test_accelerate_examples.py b/examples/pytorch/test_accelerate_examples.py index 923803a2da5..14ee36b293f 100644 --- a/examples/pytorch/test_accelerate_examples.py +++ b/examples/pytorch/test_accelerate_examples.py @@ -312,7 +312,6 @@ class ExamplesTestsNoTrainer(TestCasePlus): {self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py --model_name_or_path google/vit-base-patch16-224-in21k --dataset_name hf-internal-testing/cats_vs_dogs_sample - --trust_remote_code --learning_rate 1e-4 --per_device_train_batch_size 2 --per_device_eval_batch_size 1 diff --git a/examples/pytorch/test_pytorch_examples.py b/examples/pytorch/test_pytorch_examples.py index 3992506f513..d27cc305d6a 100644 --- a/examples/pytorch/test_pytorch_examples.py +++ b/examples/pytorch/test_pytorch_examples.py @@ -17,7 +17,6 @@ import json import logging import os import sys -import unittest from unittest.mock import patch from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining @@ -391,7 +390,6 @@ class ExamplesTests(TestCasePlus): --output_dir {tmp_dir} --model_name_or_path google/vit-base-patch16-224-in21k --dataset_name hf-internal-testing/cats_vs_dogs_sample - --trust_remote_code --do_train --do_eval --learning_rate 1e-4 @@ -415,7 +413,6 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertGreaterEqual(result["eval_accuracy"], 0.8) - @unittest.skip("temporary to avoid failing on circleci") def test_run_speech_recognition_ctc(self): tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" @@ -426,7 +423,6 @@ class ExamplesTests(TestCasePlus): --dataset_config_name clean --train_split_name validation --eval_split_name validation - --trust_remote_code --do_train --do_eval --learning_rate 1e-4 @@ -447,7 +443,6 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertLess(result["eval_loss"], result["train_loss"]) - @unittest.skip("temporary to avoid failing on circleci") def test_run_speech_recognition_ctc_adapter(self): tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" @@ -458,7 +453,6 @@ class ExamplesTests(TestCasePlus): --dataset_config_name clean --train_split_name validation --eval_split_name validation - --trust_remote_code --do_train --do_eval --learning_rate 1e-4 @@ -481,7 +475,6 @@ class ExamplesTests(TestCasePlus): self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "./adapter.tur.safetensors"))) self.assertLess(result["eval_loss"], result["train_loss"]) - @unittest.skip("temporary to avoid failing on circleci") def test_run_speech_recognition_seq2seq(self): tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" @@ -492,7 +485,6 @@ class ExamplesTests(TestCasePlus): --dataset_config_name clean --train_split_name validation --eval_split_name validation - --trust_remote_code --do_train --do_eval --learning_rate 1e-4 @@ -520,7 +512,6 @@ class ExamplesTests(TestCasePlus): --output_dir {tmp_dir} --model_name_or_path hf-internal-testing/tiny-random-wav2vec2 --dataset_name anton-l/superb_demo - --trust_remote_code --dataset_config_name ks --train_split_name test --eval_split_name test @@ -555,7 +546,6 @@ class ExamplesTests(TestCasePlus): --dataset_name hf-internal-testing/librispeech_asr_dummy --dataset_config_names clean --dataset_split_names validation - --trust_remote_code --learning_rate 1e-4 --per_device_train_batch_size 4 --per_device_eval_batch_size 4 @@ -576,7 +566,6 @@ class ExamplesTests(TestCasePlus): run_mae.py --output_dir {tmp_dir} --dataset_name hf-internal-testing/cats_vs_dogs_sample - --trust_remote_code --do_train --do_eval --learning_rate 1e-4 diff --git a/examples/tensorflow/test_tensorflow_examples.py b/examples/tensorflow/test_tensorflow_examples.py index 46ed20c021d..03d0e32def0 100644 --- a/examples/tensorflow/test_tensorflow_examples.py +++ b/examples/tensorflow/test_tensorflow_examples.py @@ -315,7 +315,6 @@ class ExamplesTests(TestCasePlus): testargs = f""" run_image_classification.py --dataset_name hf-internal-testing/cats_vs_dogs_sample - --trust_remote_code --model_name_or_path microsoft/resnet-18 --do_train --do_eval diff --git a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py index d211ef7ab05..119114033c4 100644 --- a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py +++ b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py @@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo if "speech-commands" in model_name: # TODO: Convert dataset to Parquet - dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True) + dataset = load_dataset("google/speech_commands", "v0.02", split="validation") waveform = dataset[0]["audio"]["array"] else: filepath = hf_hub_download( diff --git a/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py b/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py index 46c72a97f49..c2e366d7dd0 100644 --- a/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py +++ b/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py @@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path): # Check outputs on an image if is_semantic: image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False) - ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") image = Image.open(ds[0]["file"]) else: image_processor = BeitImageProcessor( diff --git a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py index 4ecc3335514..dfbddef0a05 100644 --- a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py @@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint( processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60") - ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") input_audio = [x["array"] for x in ds[:4]["audio"]] inputs = processor(input_audio, return_tensors="pt", padding=True) diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index 372e4b89e07..87dfed1a8c3 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -1212,7 +1212,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel): >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True) >>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac") - >>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd", split="train") >>> example = dataset[0] >>> question = "what's his name?" >>> words = example["words"] diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 5f35948771e..79c08b46d2a 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True) >>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac") - >>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd", split="train") >>> example = dataset[0] >>> question = "what's his name?" >>> words = example["words"] diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index fdaa37b9e50..66637bedd8d 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -753,9 +753,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel): >>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased") - >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True) - >>> image_path = dataset["test"][0]["file"] - >>> image = Image.open(image_path).convert("RGB") + >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa") + >>> image = dataset["test"][0]["image"] >>> encoding = processor(image, return_tensors="pt") @@ -943,7 +942,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel): >>> set_seed(0) - >>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True) + >>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True) >>> data = next(iter(dataset)) >>> image = data["image"].convert("RGB") @@ -1145,7 +1144,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel): >>> set_seed(0) - >>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True) + >>> datasets = load_dataset("nielsr/funsd", split="test") >>> labels = datasets.features["ner_tags"].feature.names >>> id2label = {v: k for v, k in enumerate(labels)} @@ -1302,9 +1301,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased") - >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True) - >>> image_path = dataset["test"][0]["file"] - >>> image = Image.open(image_path).convert("RGB") + >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa") + >>> image = dataset["test"][0]["image"] >>> question = "When is coffee break?" >>> encoding = processor(image, question, return_tensors="pt") diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py index 1b6398a382d..05f662b12a9 100644 --- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py @@ -736,7 +736,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -951,7 +951,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7) - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -1052,7 +1052,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> question = "what's his name?" @@ -1172,7 +1172,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index d6703420e4a..bac5af8a982 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel): >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7) - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> question = "what's his name?" diff --git a/src/transformers/models/lilt/modeling_lilt.py b/src/transformers/models/lilt/modeling_lilt.py index 91664c32fac..d2dd1c75166 100644 --- a/src/transformers/models/lilt/modeling_lilt.py +++ b/src/transformers/models/lilt/modeling_lilt.py @@ -644,7 +644,7 @@ class LiltModel(LiltPreTrainedModel): >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> words = example["tokens"] >>> boxes = example["bboxes"] @@ -784,7 +784,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel): >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> words = example["tokens"] >>> boxes = example["bboxes"] @@ -899,7 +899,7 @@ class LiltForTokenClassification(LiltPreTrainedModel): >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> words = example["tokens"] >>> boxes = example["bboxes"] @@ -1016,7 +1016,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel): >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> words = example["tokens"] >>> boxes = example["bboxes"] diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py index 9dfb2653828..8d26f7d790f 100644 --- a/src/transformers/models/speecht5/modeling_speecht5.py +++ b/src/transformers/models/speecht5/modeling_speecht5.py @@ -2197,7 +2197,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin): >>> from datasets import load_dataset >>> dataset = load_dataset( - ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True + ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation" ... ) # doctest: +IGNORE_RESULT >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -2878,7 +2878,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel): >>> import torch >>> dataset = load_dataset( - ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True + ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation" ... ) # doctest: +IGNORE_RESULT >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 7a5e0bd5018..8d4e368e945 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -1608,7 +1608,7 @@ class UdopModel(UdopPreTrainedModel): >>> # load an example image, along with the words and coordinates >>> # which were extracted using an OCR engine - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -1817,7 +1817,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin): >>> # load an example image, along with the words and coordinates >>> # which were extracted using an OCR engine - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] @@ -2029,7 +2029,7 @@ class UdopEncoderModel(UdopPreTrainedModel): >>> # load an example image, along with the words and coordinates >>> # which were extracted using an OCR engine - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) + >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") >>> example = dataset[0] >>> image = example["image"] >>> words = example["tokens"] diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index aebb4f350e1..14e61ec5135 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h") >>> # load first sample of English common_voice - >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True) + >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True) >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000)) >>> dataset_iter = iter(dataset) >>> sample = next(dataset_iter) diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py index 46cc2211b8c..beb22ca8674 100644 --- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py @@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): >>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm") >>> # load first sample of English common_voice - >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True) + >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True) >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000)) >>> dataset_iter = iter(dataset) >>> sample = next(dataset_iter) diff --git a/src/transformers/models/whisper/modeling_flax_whisper.py b/src/transformers/models/whisper/modeling_flax_whisper.py index 8cb98f2385d..63b7f718536 100644 --- a/src/transformers/models/whisper/modeling_flax_whisper.py +++ b/src/transformers/models/whisper/modeling_flax_whisper.py @@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r""" >>> model = FlaxWhisperForAudioClassification.from_pretrained( ... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True ... ) - >>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True) + >>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True) >>> sample = next(iter(ds)) diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py index 8a934f657b1..6488c6d16bd 100644 --- a/src/transformers/utils/doc.py +++ b/src/transformers/utils/doc.py @@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r""" >>> import torch >>> from datasets import load_dataset - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r""" >>> from datasets import load_dataset >>> import torch - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r""" >>> from datasets import load_dataset >>> import torch - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r""" >>> from datasets import load_dataset >>> import torch - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r""" >>> from datasets import load_dataset >>> import torch - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r""" >>> import torch >>> from datasets import load_dataset - >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) + >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") @@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r""" >>> import torch >>> from datasets import load_dataset - >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) + >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") @@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r""" >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r""" >>> from datasets import load_dataset >>> import tensorflow as tf - >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) + >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate @@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r""" >>> from transformers import AutoImageProcessor, {model_class} >>> from datasets import load_dataset - >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) + >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") @@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r""" >>> import tensorflow as tf >>> from datasets import load_dataset - >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) + >>> dataset = load_dataset("huggingface/cats-image")) >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") diff --git a/tests/deepspeed/test_model_zoo.py b/tests/deepspeed/test_model_zoo.py index b2c277b8621..2195bee01cc 100644 --- a/tests/deepspeed/test_model_zoo.py +++ b/tests/deepspeed/test_model_zoo.py @@ -270,7 +270,6 @@ def make_task_cmds(): "img_clas": f""" {scripts_dir}/image-classification/run_image_classification.py --dataset_name hf-internal-testing/cats_vs_dogs_sample - --trust_remote_code --remove_unused_columns False --max_steps 10 --image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py index d9ba788b1f4..51a72beeb5e 100644 --- a/tests/models/beit/test_image_processing_beit.py +++ b/tests/models/beit/test_image_processing_beit.py @@ -27,8 +27,6 @@ if is_torch_available(): import torch if is_vision_available(): - from PIL import Image - from transformers import BeitImageProcessor if is_torchvision_available(): @@ -98,23 +96,14 @@ class BeitImageProcessingTester: def prepare_semantic_single_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image = Image.open(dataset[0]["file"]) - map = Image.open(dataset[1]["file"]) - - return image, map + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + example = ds[0] + return example["image"], example["map"] def prepare_semantic_batch_inputs(): - ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image1 = Image.open(ds[0]["file"]) - map1 = Image.open(ds[1]["file"]) - image2 = Image.open(ds[2]["file"]) - map2 = Image.open(ds[3]["file"]) - - return [image1, image2], [map1, map2] + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + return list(ds["image"][:2]), list(ds["map"][:2]) @require_torch @@ -157,7 +146,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) self.assertEqual(image_processor.do_reduce_labels, True) - @unittest.skip("temporary to avoid failing on circleci") def test_call_segmentation_maps(self): for image_processing_class in self.image_processor_list: # Initialize image_processing @@ -265,7 +253,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) - @unittest.skip("temporary to avoid failing on circleci") def test_reduce_labels(self): for image_processing_class in self.image_processor_list: # Initialize image_processing @@ -282,7 +269,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) - @unittest.skip("temporary to avoid failing on circleci") def test_slow_fast_equivalence(self): if not self.test_slow_image_processor or not self.test_fast_image_processor: self.skipTest(reason="Skipping slow/fast equivalence test") diff --git a/tests/models/beit/test_modeling_beit.py b/tests/models/beit/test_modeling_beit.py index 10f9c0645b3..4804cb08b66 100644 --- a/tests/models/beit/test_modeling_beit.py +++ b/tests/models/beit/test_modeling_beit.py @@ -16,7 +16,6 @@ import unittest from datasets import load_dataset -from packaging import version from transformers import BeitConfig from transformers.testing_utils import ( @@ -53,7 +52,6 @@ if is_torch_available(): if is_vision_available(): - import PIL from PIL import Image from transformers import BeitImageProcessor @@ -504,8 +502,8 @@ class BeitModelIntegrationTest(unittest.TestCase): image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False) - ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - image = Image.open(ds[0]["file"]) + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + image = ds[0]["image"].convert("RGB") inputs = image_processor(images=image, return_tensors="pt").to(torch_device) # forward pass @@ -517,27 +515,14 @@ class BeitModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, 150, 160, 160)) self.assertEqual(logits.shape, expected_shape) - is_pillow_less_than_9 = version.parse(PIL.__version__) < version.parse("9.0.0") - - if is_pillow_less_than_9: - expected_slice = torch.tensor( - [ - [[-4.9225, -2.3954, -3.0522], [-2.8822, -1.0046, -1.7561], [-2.9549, -1.3228, -2.1347]], - [[-5.8168, -3.4129, -4.0778], [-3.8651, -2.2214, -3.0277], [-3.8356, -2.4643, -3.3535]], - [[-0.0078, 3.9952, 4.0754], [2.9856, 4.6944, 5.0035], [3.2413, 4.7813, 4.9969]], - ], - device=torch_device, - ) - else: - expected_slice = torch.tensor( - [ - [[-4.8960, -2.3688, -3.0355], [-2.8478, -0.9836, -1.7418], [-2.9449, -1.3332, -2.1456]], - [[-5.8081, -3.4124, -4.1006], [-3.8561, -2.2081, -3.0323], [-3.8365, -2.4601, -3.3669]], - [[-0.0309, 3.9868, 4.0540], [2.9640, 4.6877, 4.9976], [3.2081, 4.7690, 4.9942]], - ], - device=torch_device, - ) - + expected_slice = torch.tensor( + [ + [[-4.8963, -2.3696, -3.0359], [-2.8485, -0.9842, -1.7426], [-2.9453, -1.3338, -2.1463]], + [[-5.8099, -3.4140, -4.1025], [-3.8578, -2.2100, -3.0337], [-3.8383, -2.4615, -3.3681]], + [[-0.0314, 3.9864, 4.0536], [2.9637, 4.6879, 4.9976], [3.2074, 4.7690, 4.9946]], + ], + device=torch_device, + ) torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4) @slow @@ -547,8 +532,8 @@ class BeitModelIntegrationTest(unittest.TestCase): image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False) - ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - image = Image.open(ds[0]["file"]) + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + image = ds[0]["image"].convert("RGB") inputs = image_processor(images=image, return_tensors="pt").to(torch_device) # forward pass diff --git a/tests/models/data2vec/test_modeling_data2vec_audio.py b/tests/models/data2vec/test_modeling_data2vec_audio.py index 5a8f410a70d..e275b8d681b 100644 --- a/tests/models/data2vec/test_modeling_data2vec_audio.py +++ b/tests/models/data2vec/test_modeling_data2vec_audio.py @@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase): return [x["array"] for x in speech_samples] def _load_superb(self, task, num_samples): - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py index 28bbaa31898..538ec08dc1c 100644 --- a/tests/models/dpt/test_image_processing_dpt.py +++ b/tests/models/dpt/test_image_processing_dpt.py @@ -29,8 +29,6 @@ if is_torch_available(): import torch if is_vision_available(): - from PIL import Image - from transformers import DPTImageProcessor if is_torchvision_available(): @@ -94,24 +92,15 @@ class DPTImageProcessingTester: # Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs def prepare_semantic_single_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image = Image.open(dataset[0]["file"]) - map = Image.open(dataset[1]["file"]) - - return image, map + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + example = ds[0] + return example["image"], example["map"] # Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs def prepare_semantic_batch_inputs(): - ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image1 = Image.open(ds[0]["file"]) - map1 = Image.open(ds[1]["file"]) - image2 = Image.open(ds[2]["file"]) - map2 = Image.open(ds[3]["file"]) - - return [image1, image2], [map1, map2] + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + return list(ds["image"][:2]), list(ds["map"][:2]) @require_torch @@ -187,7 +176,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672]) - @unittest.skip("temporary to avoid failing on circleci") # Copied from transformers.tests.models.beit.test_image_processing_beit.BeitImageProcessingTest.test_call_segmentation_maps def test_call_segmentation_maps(self): for image_processing_class in self.image_processor_list: @@ -296,7 +284,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) - @unittest.skip("temporary to avoid failing on circleci") def test_reduce_labels(self): for image_processing_class in self.image_processor_list: image_processor = image_processing_class(**self.image_processor_dict) @@ -319,7 +306,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): # Compare with non-reduced label to see if it's reduced by 1 self.assertEqual(encoding["labels"][first_non_zero_coords].item(), first_non_zero_value - 1) - @unittest.skip("temporary to avoid failing on circleci") def test_slow_fast_equivalence(self): if not self.test_slow_image_processor or not self.test_fast_image_processor: self.skipTest(reason="Skipping slow/fast equivalence test") @@ -341,7 +327,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): ) self.assertTrue(torch.allclose(image_encoding_slow.labels, image_encoding_fast.labels, atol=1e-1)) - @unittest.skip("temporary to avoid failing on circleci") def test_slow_fast_equivalence_batched(self): if not self.test_slow_image_processor or not self.test_fast_image_processor: self.skipTest(reason="Skipping slow/fast equivalence test") diff --git a/tests/models/granite_speech/test_modeling_granite_speech.py b/tests/models/granite_speech/test_modeling_granite_speech.py index cde0779a503..67ef91db785 100644 --- a/tests/models/granite_speech/test_modeling_granite_speech.py +++ b/tests/models/granite_speech/test_modeling_granite_speech.py @@ -391,7 +391,7 @@ class GraniteSpeechForConditionalGenerationIntegrationTest(unittest.TestCase): EXPECTED_DECODED_TEXT = [ "systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantmister quilter is the apostle of the middle classes and we are glad to welcome his gospel", - "systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilp's manner less interesting than his matter" + "systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilter's manner less interesting than his matter" ] # fmt: skip self.assertEqual( diff --git a/tests/models/hubert/test_modeling_hubert.py b/tests/models/hubert/test_modeling_hubert.py index de26f4c7a4e..905b435bb59 100644 --- a/tests/models/hubert/test_modeling_hubert.py +++ b/tests/models/hubert/test_modeling_hubert.py @@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase): def _load_superb(self, task, num_samples): from datasets import load_dataset - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py index 4b8d50489e8..f574f675110 100644 --- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py @@ -123,13 +123,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase) def test_layoutlmv2_integration_test(self): from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") for image_processing_class in self.image_processor_list: # with apply_OCR = True image_processing = image_processing_class() - image = Image.open(ds[0]["file"]).convert("RGB") + image = ds[0]["image"] encoding = image_processing(image, return_tensors="pt") diff --git a/tests/models/layoutlmv2/test_processor_layoutlmv2.py b/tests/models/layoutlmv2/test_processor_layoutlmv2.py index 28b4d7a232b..e9f76cfbd71 100644 --- a/tests/models/layoutlmv2/test_processor_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_processor_layoutlmv2.py @@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin if is_pytesseract_available(): - from PIL import Image - from transformers import LayoutLMv2ImageProcessor @@ -156,11 +154,11 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): from datasets import load_dataset # set up - datasets = load_dataset("nielsr/funsd", trust_remote_code=True) + datasets = load_dataset("nielsr/funsd") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") def preprocess_data(examples): - images = [Image.open(path).convert("RGB") for path in examples["image_path"]] + images = [image.convert("RGB") for image in examples["image"]] words = examples["words"] boxes = examples["bboxes"] word_labels = examples["ner_tags"] @@ -192,12 +190,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # we verify our implementation on 2 document images from the DocVQA dataset from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) - - image_1 = Image.open(ds[0]["file"]).convert("RGB") - image_2 = Image.open(ds[1]["file"]).convert("RGB") - - return image_1, image_2 + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") + return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB") @cached_property def get_tokenizers(self): diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py index 0b1fb79495f..eb4b4f1d9ac 100644 --- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py @@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im if is_pytesseract_available(): - from PIL import Image - from transformers import LayoutLMv3ImageProcessor if is_torchvision_available(): @@ -103,17 +101,16 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase) image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - @unittest.skip("temporary to avoid failing on circleci") def test_LayoutLMv3_integration_test(self): from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") # with apply_OCR = True for image_processing_class in self.image_processor_list: image_processor = image_processing_class() - image = Image.open(ds[0]["file"]).convert("RGB") + image = ds[0]["image"].convert("RGB") encoding = image_processor(image, return_tensors="pt") diff --git a/tests/models/layoutlmv3/test_processor_layoutlmv3.py b/tests/models/layoutlmv3/test_processor_layoutlmv3.py index cb102527632..cf367c615ea 100644 --- a/tests/models/layoutlmv3/test_processor_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_processor_layoutlmv3.py @@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin if is_pytesseract_available(): - from PIL import Image - from transformers import LayoutLMv3ImageProcessor @@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # we verify our implementation on 2 document images from the DocVQA dataset from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) - - image_1 = Image.open(ds[0]["file"]).convert("RGB") - image_2 = Image.open(ds[1]["file"]).convert("RGB") - - return image_1, image_2 + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") + return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB") @cached_property def get_tokenizers(self): diff --git a/tests/models/layoutxlm/test_processor_layoutxlm.py b/tests/models/layoutxlm/test_processor_layoutxlm.py index 57872eda807..2fc7a273b96 100644 --- a/tests/models/layoutxlm/test_processor_layoutxlm.py +++ b/tests/models/layoutxlm/test_processor_layoutxlm.py @@ -33,8 +33,6 @@ from ...test_processing_common import ProcessorTesterMixin if is_pytesseract_available(): - from PIL import Image - from transformers import LayoutLMv2ImageProcessor @@ -162,11 +160,11 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase): from datasets import load_dataset # set up - datasets = load_dataset("nielsr/funsd", trust_remote_code=True) + datasets = load_dataset("nielsr/funsd") processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False) def preprocess_data(examples): - images = [Image.open(path).convert("RGB") for path in examples["image_path"]] + images = [image.convert("RGB") for image in examples["image"]] words = examples["words"] boxes = examples["bboxes"] word_labels = examples["ner_tags"] @@ -200,12 +198,8 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # we verify our implementation on 2 document images from the DocVQA dataset from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) - - image_1 = Image.open(ds[0]["file"]).convert("RGB") - image_2 = Image.open(ds[1]["file"]).convert("RGB") - - return image_1, image_2 + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") + return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB") @cached_property def get_tokenizers(self): diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py index c9bfc360592..7df498176d7 100644 --- a/tests/models/mobilevit/test_image_processing_mobilevit.py +++ b/tests/models/mobilevit/test_image_processing_mobilevit.py @@ -27,8 +27,6 @@ if is_torch_available(): import torch if is_vision_available(): - from PIL import Image - from transformers import MobileViTImageProcessor @@ -86,23 +84,14 @@ class MobileViTImageProcessingTester: def prepare_semantic_single_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image = Image.open(dataset[0]["file"]) - map = Image.open(dataset[1]["file"]) - - return image, map + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + example = ds[0] + return example["image"], example["map"] def prepare_semantic_batch_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image1 = Image.open(dataset[0]["file"]) - map1 = Image.open(dataset[1]["file"]) - image2 = Image.open(dataset[2]["file"]) - map2 = Image.open(dataset[3]["file"]) - - return [image1, image2], [map1, map2] + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + return list(ds["image"][:2]), list(ds["map"][:2]) @require_torch @@ -135,7 +124,6 @@ class MobileViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - @unittest.skip("temporary to avoid failing on circleci") def test_call_segmentation_maps(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) diff --git a/tests/models/nougat/test_image_processing_nougat.py b/tests/models/nougat/test_image_processing_nougat.py index 5b28c00a88b..996860da6ed 100644 --- a/tests/models/nougat/test_image_processing_nougat.py +++ b/tests/models/nougat/test_image_processing_nougat.py @@ -86,8 +86,12 @@ class NougatImageProcessingTester: return self.num_channels, self.size["height"], self.size["width"] def prepare_dummy_image(self): + revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db" filepath = hf_hub_download( - repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset" + repo_id="hf-internal-testing/fixtures_docvqa", + filename="nougat_pdf.png", + repo_type="dataset", + revision=revision, ) image = Image.open(filepath).convert("RGB") return image @@ -136,7 +140,6 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - @unittest.skip("temporary to avoid failing on circleci") def test_expected_output(self): dummy_image = self.image_processor_tester.prepare_dummy_image() image_processor = self.image_processor @@ -180,13 +183,16 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual((3, 100, 200), aligned_image.shape) def prepare_dummy_np_image(self): + revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db" filepath = hf_hub_download( - repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset" + repo_id="hf-internal-testing/fixtures_docvqa", + filename="nougat_pdf.png", + repo_type="dataset", + revision=revision, ) image = Image.open(filepath).convert("RGB") return np.array(image) - @unittest.skip("temporary to avoid failing on circleci") def test_crop_margin_equality_cv2_python(self): image = self.prepare_dummy_np_image() image_processor = self.image_processor diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index fddf1db71a3..6c2aceea53f 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -842,11 +842,8 @@ def prepare_img(): # Helper functions for optical flow integration test def prepare_optical_flow_images(): - dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True) - image1 = Image.open(dataset[0]["file"]).convert("RGB") - image2 = Image.open(dataset[0]["file"]).convert("RGB") - - return image1, image2 + ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test") + return list(ds["image"][:2]) def normalize(img): diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py index 92cf617ee7b..f03d9c4fd60 100644 --- a/tests/models/segformer/test_image_processing_segformer.py +++ b/tests/models/segformer/test_image_processing_segformer.py @@ -27,8 +27,6 @@ if is_torch_available(): import torch if is_vision_available(): - from PIL import Image - from transformers import SegformerImageProcessor @@ -86,23 +84,14 @@ class SegformerImageProcessingTester: def prepare_semantic_single_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image = Image.open(dataset[0]["file"]) - map = Image.open(dataset[1]["file"]) - - return image, map + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + example = ds[0] + return example["image"], example["map"] def prepare_semantic_batch_inputs(): - dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - - image1 = Image.open(dataset[0]["file"]) - map1 = Image.open(dataset[1]["file"]) - image2 = Image.open(dataset[2]["file"]) - map2 = Image.open(dataset[3]["file"]) - - return [image1, image2], [map1, map2] + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + return list(ds["image"][:2]), list(ds["map"][:2]) @require_torch @@ -138,7 +127,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.do_reduce_labels, True) - @unittest.skip("temporary to avoid failing on circleci") def test_call_segmentation_maps(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) @@ -245,7 +233,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) - @unittest.skip("temporary to avoid failing on circleci") def test_reduce_labels(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) diff --git a/tests/models/udop/test_modeling_udop.py b/tests/models/udop/test_modeling_udop.py index 86b7710c176..92dd47c3920 100644 --- a/tests/models/udop/test_modeling_udop.py +++ b/tests/models/udop/test_modeling_udop.py @@ -16,9 +16,9 @@ import copy import inspect import unittest -from huggingface_hub import hf_hub_download +from datasets import load_dataset -from transformers import UdopConfig, is_torch_available, is_vision_available +from transformers import UdopConfig, is_torch_available from transformers.testing_utils import ( require_sentencepiece, require_tokenizers, @@ -42,10 +42,6 @@ if is_torch_available(): from transformers import UdopEncoderModel, UdopForConditionalGeneration, UdopModel, UdopProcessor -if is_vision_available(): - from PIL import Image - - class UdopModelTester: def __init__( self, @@ -618,12 +614,8 @@ class UdopEncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase): class UdopModelIntegrationTests(unittest.TestCase): @cached_property def image(self): - filepath = hf_hub_download( - repo_id="hf-internal-testing/fixtures_docvqa", filename="document_2.png", repo_type="dataset" - ) - image = Image.open(filepath).convert("RGB") - - return image + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") + return ds[1]["image"] @cached_property def processor(self): diff --git a/tests/models/udop/test_processor_udop.py b/tests/models/udop/test_processor_udop.py index ea08feea41e..2fc3f59d2db 100644 --- a/tests/models/udop/test_processor_udop.py +++ b/tests/models/udop/test_processor_udop.py @@ -41,8 +41,6 @@ if is_torch_available(): if is_pytesseract_available(): - from PIL import Image - from transformers import LayoutLMv3ImageProcessor @@ -184,11 +182,11 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase): from datasets import load_dataset # set up - datasets = load_dataset("nielsr/funsd", trust_remote_code=True) + datasets = load_dataset("nielsr/funsd") processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False) def preprocess_data(examples): - images = [Image.open(path).convert("RGB") for path in examples["image_path"]] + images = [image.convert("RGB") for image in examples["image"]] words = examples["words"] boxes = examples["bboxes"] word_labels = examples["ner_tags"] @@ -222,12 +220,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase): # we verify our implementation on 2 document images from the DocVQA dataset from datasets import load_dataset - ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) - - image_1 = Image.open(ds[0]["file"]).convert("RGB") - image_2 = Image.open(ds[1]["file"]).convert("RGB") - - return image_1, image_2 + ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test") + return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB") @cached_property def get_tokenizers(self): diff --git a/tests/models/unispeech/test_modeling_unispeech.py b/tests/models/unispeech/test_modeling_unispeech.py index ebc537a4788..37da494a965 100644 --- a/tests/models/unispeech/test_modeling_unispeech.py +++ b/tests/models/unispeech/test_modeling_unispeech.py @@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase): return [x["array"] for x in speech_samples] def _load_superb(self, task, num_samples): - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] diff --git a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py index ec438dea96b..1b6a1cb8042 100644 --- a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py +++ b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py @@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase): return [x["array"] for x in speech_samples] def _load_superb(self, task, num_samples): - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] diff --git a/tests/models/upernet/test_modeling_upernet.py b/tests/models/upernet/test_modeling_upernet.py index fc62b323252..ed0a982efd8 100644 --- a/tests/models/upernet/test_modeling_upernet.py +++ b/tests/models/upernet/test_modeling_upernet.py @@ -15,7 +15,7 @@ import unittest -from huggingface_hub import hf_hub_download +from datasets import load_dataset from transformers import ConvNextConfig, UperNetConfig from transformers.testing_utils import ( @@ -41,8 +41,6 @@ if is_torch_available(): if is_vision_available(): - from PIL import Image - from transformers import AutoImageProcessor @@ -277,11 +275,8 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) # We will verify our results on an image of ADE20k def prepare_img(): - filepath = hf_hub_download( - repo_id="hf-internal-testing/fixtures_ade20k", repo_type="dataset", filename="ADE_val_00000001.jpg" - ) - image = Image.open(filepath).convert("RGB") - return image + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + return ds[0]["image"].convert("RGB") @require_torch @@ -302,7 +297,7 @@ class UperNetModelIntegrationTest(unittest.TestCase): self.assertEqual(outputs.logits.shape, expected_shape) expected_slice = torch.tensor( - [[-7.5958, -7.5958, -7.4302], [-7.5958, -7.5958, -7.4302], [-7.4797, -7.4797, -7.3068]] + [[-7.5969, -7.5969, -7.4313], [-7.5969, -7.5969, -7.4313], [-7.4808, -7.4808, -7.3080]] ).to(torch_device) torch.testing.assert_close(outputs.logits[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4) diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py index 4537003b099..ec3fff698be 100644 --- a/tests/models/vilt/test_modeling_vilt.py +++ b/tests/models/vilt/test_modeling_vilt.py @@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase): processor = self.default_processor - dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True) - image1 = Image.open(dataset[0]["file"]).convert("RGB") - image2 = Image.open(dataset[1]["file"]).convert("RGB") + dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train") + image1 = dataset[0]["image"] + image2 = dataset[1]["image"] text = ( "The left image contains twice the number of dogs as the right image, and at least two dogs in total are" diff --git a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py index ffd08297f14..93264feab2c 100644 --- a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py +++ b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py @@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase): def test_inference_handwritten(self): model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device) - dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True) - image = Image.open(dataset[0]["file"]).convert("RGB") + dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train") + image = dataset[1]["image"].convert("RGB") processor = self.default_processor pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device) @@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase): def test_inference_printed(self): model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device) - dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True) - image = Image.open(dataset[1]["file"]).convert("RGB") + dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train") + image = dataset[0]["image"].convert("RGB") processor = self.default_processor pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device) diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 9597d2e6ef2..087664f4d26 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -97,9 +97,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout): try: _ = in_queue.get(timeout=timeout) - ds = load_dataset( - "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True - ) + ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True) sample = next(iter(ds)) resampled_audio = torchaudio.functional.resample( @@ -1470,7 +1468,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): return [x["array"] for x in speech_samples] def _load_superb(self, task, num_samples): - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] @@ -1836,9 +1834,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): @require_pyctcdecode @require_torchaudio def test_wav2vec2_with_lm(self): - ds = load_dataset( - "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True - ) + ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True) sample = next(iter(ds)) resampled_audio = torchaudio.functional.resample( @@ -1862,9 +1858,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): @require_pyctcdecode @require_torchaudio def test_wav2vec2_with_lm_pool(self): - ds = load_dataset( - "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True - ) + ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True) sample = next(iter(ds)) resampled_audio = torchaudio.functional.resample( @@ -1963,9 +1957,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"} def run_model(lang): - ds = load_dataset( - "mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True - ) + ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True) sample = next(iter(ds)) wav2vec2_lang = LANG_MAP[lang] diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py index eaea550ee97..66fc8665cb5 100644 --- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py +++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py @@ -463,9 +463,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): def test_word_time_stamp_integration(self): import torch - ds = load_dataset( - "mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True - ) + ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) ds_iter = iter(ds) sample = next(ds_iter) diff --git a/tests/models/wavlm/test_modeling_wavlm.py b/tests/models/wavlm/test_modeling_wavlm.py index 618e8c3ffa1..84855613dd6 100644 --- a/tests/models/wavlm/test_modeling_wavlm.py +++ b/tests/models/wavlm/test_modeling_wavlm.py @@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase): return [x["array"] for x in speech_samples] def _load_superb(self, task, num_samples): - ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) + ds = load_dataset("anton-l/superb_dummy", task, split="test") return ds[:num_samples] diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index ab9c98484b7..3e1b42fde90 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -1645,9 +1645,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3") model.to(torch_device) - ds = load_dataset( - "facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True - ) + ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) input_speech = next(iter(ds))["audio"]["array"] @@ -1714,11 +1712,10 @@ class WhisperModelIntegrationTests(unittest.TestCase): token = os.getenv("HF_HUB_READ_TOKEN", True) ds = load_dataset( - "mozilla-foundation/common_voice_6_1", + "hf-internal-testing/fixtures_common_voice", "ja", split="test", streaming=True, - trust_remote_code=True, token=token, ) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) @@ -1728,7 +1725,10 @@ class WhisperModelIntegrationTests(unittest.TestCase): torch_device ) - EXPECTED_TRANSCRIPTS = ["木村さんに電話を貸してもらいました", " Kimura-san called me."] + EXPECTED_TRANSCRIPTS = [ + "夏の時期の時期でした", + " It was the time of day and all of the pens left during the summer.", + ] generated_ids = model.generate( input_features.repeat(2, 1, 1), diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index cea317d0eb0..bbad033d138 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase): model = "superb/wav2vec2-base-superb-ks" audio_classifier = pipeline("audio-classification", model=model) - dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True) + dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test") audio = np.array(dataset[3]["speech"], dtype=np.float32) output = audio_classifier(audio, top_k=4) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index f18a35b83fe..d48caf16137 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): @require_torch @require_pyctcdecode def test_large_model_pt_with_lm(self): - dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True) - third_item = next(iter(dataset["test"].skip(3))) - filename = third_item["file"] + filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset") speech_recognizer = pipeline( task="automatic-speech-recognition", @@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): chunk_length_s=8, stride_length_s=1, ) - data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) + data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True) sample = next(iter(data)) res = pipe(sample["audio"]["array"]) @@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): stride_length_s=1, return_language=True, ) - data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) + data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True) sample = next(iter(data)) res = pipe(sample["audio"]["array"]) @@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): task="automatic-speech-recognition", model="openai/whisper-tiny.en", ) - data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) + data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True) samples = [next(iter(data)) for _ in range(8)] audio = np.concatenate([sample["audio"]["array"] for sample in samples]) @@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): @slow def test_speculative_decoding_whisper_non_distil(self): # Load data: - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]") sample = dataset[0]["audio"] # Load model: @@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): @slow def test_speculative_decoding_whisper_distil(self): # Load data: - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]") sample = dataset[0]["audio"] # Load model: diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 36e64602312..215a6180379 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -601,9 +601,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase): image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor) - image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - file = image[0]["file"] - outputs = image_segmenter(file, threshold=threshold) + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + image = ds[0]["image"].convert("RGB") + outputs = image_segmenter(image, threshold=threshold) # Shortening by hashing for o in outputs: @@ -655,9 +655,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase): def test_oneformer(self): image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny") - image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) - file = image[0]["file"] - outputs = image_segmenter(file, threshold=0.99) + ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") + image = ds[0]["image"].convert("RGB") + outputs = image_segmenter(image, threshold=0.99) # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) @@ -679,7 +679,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase): ) # Different task - outputs = image_segmenter(file, threshold=0.99, subtask="instance") + outputs = image_segmenter(image, threshold=0.99, subtask="instance") # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) @@ -701,7 +701,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase): ) # Different task - outputs = image_segmenter(file, subtask="semantic") + outputs = image_segmenter(image, subtask="semantic") # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"])