Remove script datasets in tests (#38940)

* remove trust_remote_code

* again

* Revert "Skip some tests for now (#38931)"

This reverts commit 31d30b7224.

* again

* style

* again

* again

* style

* fix integration test

* fix tests

* style

* fix

* fix

* fix the last ones

* style

* last one

* fix last

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Quentin Lhoest 2025-06-25 16:31:20 +02:00 committed by GitHub
parent 3c322c9cdf
commit 858f9b71a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
51 changed files with 154 additions and 293 deletions

View File

@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
```python ```python
>>> # let's load an audio sample from an Arabic speech corpus >>> # let's load an audio sample from an Arabic speech corpus
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True) >>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
>>> audio_sample = next(iter(dataset))["audio"] >>> audio_sample = next(iter(dataset))["audio"]
>>> # now, process it >>> # now, process it

View File

@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
```python ```python
>>> # let's load an audio sample from an Arabic speech corpus >>> # let's load an audio sample from an Arabic speech corpus
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True) >>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
>>> audio_sample = next(iter(dataset))["audio"] >>> audio_sample = next(iter(dataset))["audio"]
>>> # now, process it >>> # now, process it

View File

@ -264,7 +264,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config clean --dataset_config clean
--train_split_name validation --train_split_name validation
--eval_split_name validation --eval_split_name validation
--trust_remote_code
--output_dir {tmp_dir} --output_dir {tmp_dir}
--overwrite_output_dir --overwrite_output_dir
--num_train_epochs=2 --num_train_epochs=2

View File

@ -312,7 +312,6 @@ class ExamplesTestsNoTrainer(TestCasePlus):
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py {self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
--model_name_or_path google/vit-base-patch16-224-in21k --model_name_or_path google/vit-base-patch16-224-in21k
--dataset_name hf-internal-testing/cats_vs_dogs_sample --dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--learning_rate 1e-4 --learning_rate 1e-4
--per_device_train_batch_size 2 --per_device_train_batch_size 2
--per_device_eval_batch_size 1 --per_device_eval_batch_size 1

View File

@ -17,7 +17,6 @@ import json
import logging import logging
import os import os
import sys import sys
import unittest
from unittest.mock import patch from unittest.mock import patch
from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
@ -391,7 +390,6 @@ class ExamplesTests(TestCasePlus):
--output_dir {tmp_dir} --output_dir {tmp_dir}
--model_name_or_path google/vit-base-patch16-224-in21k --model_name_or_path google/vit-base-patch16-224-in21k
--dataset_name hf-internal-testing/cats_vs_dogs_sample --dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--do_train --do_train
--do_eval --do_eval
--learning_rate 1e-4 --learning_rate 1e-4
@ -415,7 +413,6 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.8) self.assertGreaterEqual(result["eval_accuracy"], 0.8)
@unittest.skip("temporary to avoid failing on circleci")
def test_run_speech_recognition_ctc(self): def test_run_speech_recognition_ctc(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@ -426,7 +423,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean --dataset_config_name clean
--train_split_name validation --train_split_name validation
--eval_split_name validation --eval_split_name validation
--trust_remote_code
--do_train --do_train
--do_eval --do_eval
--learning_rate 1e-4 --learning_rate 1e-4
@ -447,7 +443,6 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertLess(result["eval_loss"], result["train_loss"]) self.assertLess(result["eval_loss"], result["train_loss"])
@unittest.skip("temporary to avoid failing on circleci")
def test_run_speech_recognition_ctc_adapter(self): def test_run_speech_recognition_ctc_adapter(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@ -458,7 +453,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean --dataset_config_name clean
--train_split_name validation --train_split_name validation
--eval_split_name validation --eval_split_name validation
--trust_remote_code
--do_train --do_train
--do_eval --do_eval
--learning_rate 1e-4 --learning_rate 1e-4
@ -481,7 +475,6 @@ class ExamplesTests(TestCasePlus):
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "./adapter.tur.safetensors"))) self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "./adapter.tur.safetensors")))
self.assertLess(result["eval_loss"], result["train_loss"]) self.assertLess(result["eval_loss"], result["train_loss"])
@unittest.skip("temporary to avoid failing on circleci")
def test_run_speech_recognition_seq2seq(self): def test_run_speech_recognition_seq2seq(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@ -492,7 +485,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean --dataset_config_name clean
--train_split_name validation --train_split_name validation
--eval_split_name validation --eval_split_name validation
--trust_remote_code
--do_train --do_train
--do_eval --do_eval
--learning_rate 1e-4 --learning_rate 1e-4
@ -520,7 +512,6 @@ class ExamplesTests(TestCasePlus):
--output_dir {tmp_dir} --output_dir {tmp_dir}
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2 --model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--dataset_name anton-l/superb_demo --dataset_name anton-l/superb_demo
--trust_remote_code
--dataset_config_name ks --dataset_config_name ks
--train_split_name test --train_split_name test
--eval_split_name test --eval_split_name test
@ -555,7 +546,6 @@ class ExamplesTests(TestCasePlus):
--dataset_name hf-internal-testing/librispeech_asr_dummy --dataset_name hf-internal-testing/librispeech_asr_dummy
--dataset_config_names clean --dataset_config_names clean
--dataset_split_names validation --dataset_split_names validation
--trust_remote_code
--learning_rate 1e-4 --learning_rate 1e-4
--per_device_train_batch_size 4 --per_device_train_batch_size 4
--per_device_eval_batch_size 4 --per_device_eval_batch_size 4
@ -576,7 +566,6 @@ class ExamplesTests(TestCasePlus):
run_mae.py run_mae.py
--output_dir {tmp_dir} --output_dir {tmp_dir}
--dataset_name hf-internal-testing/cats_vs_dogs_sample --dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--do_train --do_train
--do_eval --do_eval
--learning_rate 1e-4 --learning_rate 1e-4

View File

@ -315,7 +315,6 @@ class ExamplesTests(TestCasePlus):
testargs = f""" testargs = f"""
run_image_classification.py run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample --dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--model_name_or_path microsoft/resnet-18 --model_name_or_path microsoft/resnet-18
--do_train --do_train
--do_eval --do_eval

View File

@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo
if "speech-commands" in model_name: if "speech-commands" in model_name:
# TODO: Convert dataset to Parquet # TODO: Convert dataset to Parquet
dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True) dataset = load_dataset("google/speech_commands", "v0.02", split="validation")
waveform = dataset[0]["audio"]["array"] waveform = dataset[0]["audio"]["array"]
else: else:
filepath = hf_hub_download( filepath = hf_hub_download(

View File

@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
# Check outputs on an image # Check outputs on an image
if is_semantic: if is_semantic:
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False) image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"]) image = Image.open(ds[0]["file"])
else: else:
image_processor = BeitImageProcessor( image_processor = BeitImageProcessor(

View File

@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint(
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60") processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_audio = [x["array"] for x in ds[:4]["audio"]] input_audio = [x["array"] for x in ds[:4]["audio"]]
inputs = processor(input_audio, return_tensors="pt", padding=True) inputs = processor(input_audio, return_tensors="pt", padding=True)

View File

@ -1212,7 +1212,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True) >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac") >>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> question = "what's his name?" >>> question = "what's his name?"
>>> words = example["words"] >>> words = example["words"]

View File

@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True) >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac") >>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> question = "what's his name?" >>> question = "what's his name?"
>>> words = example["words"] >>> words = example["words"]

View File

@ -753,9 +753,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
>>> image_path = dataset["test"][0]["file"] >>> image = dataset["test"][0]["image"]
>>> image = Image.open(image_path).convert("RGB")
>>> encoding = processor(image, return_tensors="pt") >>> encoding = processor(image, return_tensors="pt")
@ -943,7 +942,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
>>> set_seed(0) >>> set_seed(0)
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True) >>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True)
>>> data = next(iter(dataset)) >>> data = next(iter(dataset))
>>> image = data["image"].convert("RGB") >>> image = data["image"].convert("RGB")
@ -1145,7 +1144,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
>>> set_seed(0) >>> set_seed(0)
>>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True) >>> datasets = load_dataset("nielsr/funsd", split="test")
>>> labels = datasets.features["ner_tags"].feature.names >>> labels = datasets.features["ner_tags"].feature.names
>>> id2label = {v: k for v, k in enumerate(labels)} >>> id2label = {v: k for v, k in enumerate(labels)}
@ -1302,9 +1301,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
>>> image_path = dataset["test"][0]["file"] >>> image = dataset["test"][0]["image"]
>>> image = Image.open(image_path).convert("RGB")
>>> question = "When is coffee break?" >>> question = "When is coffee break?"
>>> encoding = processor(image, question, return_tensors="pt") >>> encoding = processor(image, question, return_tensors="pt")

View File

@ -736,7 +736,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base") >>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -951,7 +951,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7) >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -1052,7 +1052,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base") >>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> question = "what's his name?" >>> question = "what's his name?"
@ -1172,7 +1172,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base") >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]

View File

@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base") >>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base") >>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7) >>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False) >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base") >>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> question = "what's his name?" >>> question = "what's his name?"

View File

@ -644,7 +644,7 @@ class LiltModel(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> words = example["tokens"] >>> words = example["tokens"]
>>> boxes = example["bboxes"] >>> boxes = example["bboxes"]
@ -784,7 +784,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> words = example["tokens"] >>> words = example["tokens"]
>>> boxes = example["bboxes"] >>> boxes = example["bboxes"]
@ -899,7 +899,7 @@ class LiltForTokenClassification(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> words = example["tokens"] >>> words = example["tokens"]
>>> boxes = example["bboxes"] >>> boxes = example["bboxes"]
@ -1016,7 +1016,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base") >>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> words = example["tokens"] >>> words = example["tokens"]
>>> boxes = example["bboxes"] >>> boxes = example["bboxes"]

View File

@ -2197,7 +2197,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin):
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset( >>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT ... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -2878,7 +2878,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
>>> import torch >>> import torch
>>> dataset = load_dataset( >>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True ... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT ... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate

View File

@ -1608,7 +1608,7 @@ class UdopModel(UdopPreTrainedModel):
>>> # load an example image, along with the words and coordinates >>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine >>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -1817,7 +1817,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin):
>>> # load an example image, along with the words and coordinates >>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine >>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]
@ -2029,7 +2029,7 @@ class UdopEncoderModel(UdopPreTrainedModel):
>>> # load an example image, along with the words and coordinates >>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine >>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True) >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0] >>> example = dataset[0]
>>> image = example["image"] >>> image = example["image"]
>>> words = example["tokens"] >>> words = example["tokens"]

View File

@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h") >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
>>> # load first sample of English common_voice >>> # load first sample of English common_voice
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True) >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000)) >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
>>> dataset_iter = iter(dataset) >>> dataset_iter = iter(dataset)
>>> sample = next(dataset_iter) >>> sample = next(dataset_iter)

View File

@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm") >>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
>>> # load first sample of English common_voice >>> # load first sample of English common_voice
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True) >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000)) >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
>>> dataset_iter = iter(dataset) >>> dataset_iter = iter(dataset)
>>> sample = next(dataset_iter) >>> sample = next(dataset_iter)

View File

@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r"""
>>> model = FlaxWhisperForAudioClassification.from_pretrained( >>> model = FlaxWhisperForAudioClassification.from_pretrained(
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True ... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
... ) ... )
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True) >>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True)
>>> sample = next(iter(ds)) >>> sample = next(iter(ds))

View File

@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> import torch >>> import torch
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import torch >>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import torch >>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import torch >>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import torch >>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
>>> import torch >>> import torch
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) >>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0] >>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import torch >>> import torch
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) >>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0] >>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoProcessor, {model_class} >>> from transformers import AutoProcessor, {model_class}
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import tensorflow as tf >>> import tensorflow as tf
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True) >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoImageProcessor, {model_class} >>> from transformers import AutoImageProcessor, {model_class}
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) >>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0] >>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True) >>> dataset = load_dataset("huggingface/cats-image"))
>>> image = dataset["test"]["image"][0] >>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")

View File

@ -270,7 +270,6 @@ def make_task_cmds():
"img_clas": f""" "img_clas": f"""
{scripts_dir}/image-classification/run_image_classification.py {scripts_dir}/image-classification/run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample --dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--remove_unused_columns False --remove_unused_columns False
--max_steps 10 --max_steps 10
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json --image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch import torch
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import BeitImageProcessor from transformers import BeitImageProcessor
if is_torchvision_available(): if is_torchvision_available():
@ -98,23 +96,14 @@ class BeitImageProcessingTester:
def prepare_semantic_single_inputs(): def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
image = Image.open(dataset[0]["file"]) return example["image"], example["map"]
map = Image.open(dataset[1]["file"])
return image, map
def prepare_semantic_batch_inputs(): def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
image1 = Image.open(ds[0]["file"])
map1 = Image.open(ds[1]["file"])
image2 = Image.open(ds[2]["file"])
map2 = Image.open(ds[3]["file"])
return [image1, image2], [map1, map2]
@require_torch @require_torch
@ -157,7 +146,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
self.assertEqual(image_processor.do_reduce_labels, True) self.assertEqual(image_processor.do_reduce_labels, True)
@unittest.skip("temporary to avoid failing on circleci")
def test_call_segmentation_maps(self): def test_call_segmentation_maps(self):
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
# Initialize image_processing # Initialize image_processing
@ -265,7 +253,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255) self.assertTrue(encoding["labels"].max().item() <= 255)
@unittest.skip("temporary to avoid failing on circleci")
def test_reduce_labels(self): def test_reduce_labels(self):
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
# Initialize image_processing # Initialize image_processing
@ -282,7 +269,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255) self.assertTrue(encoding["labels"].max().item() <= 255)
@unittest.skip("temporary to avoid failing on circleci")
def test_slow_fast_equivalence(self): def test_slow_fast_equivalence(self):
if not self.test_slow_image_processor or not self.test_fast_image_processor: if not self.test_slow_image_processor or not self.test_fast_image_processor:
self.skipTest(reason="Skipping slow/fast equivalence test") self.skipTest(reason="Skipping slow/fast equivalence test")

View File

@ -16,7 +16,6 @@
import unittest import unittest
from datasets import load_dataset from datasets import load_dataset
from packaging import version
from transformers import BeitConfig from transformers import BeitConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
@ -53,7 +52,6 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
import PIL
from PIL import Image from PIL import Image
from transformers import BeitImageProcessor from transformers import BeitImageProcessor
@ -504,8 +502,8 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False) image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"]) image = ds[0]["image"].convert("RGB")
inputs = image_processor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
@ -517,27 +515,14 @@ class BeitModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 150, 160, 160)) expected_shape = torch.Size((1, 150, 160, 160))
self.assertEqual(logits.shape, expected_shape) self.assertEqual(logits.shape, expected_shape)
is_pillow_less_than_9 = version.parse(PIL.__version__) < version.parse("9.0.0")
if is_pillow_less_than_9:
expected_slice = torch.tensor( expected_slice = torch.tensor(
[ [
[[-4.9225, -2.3954, -3.0522], [-2.8822, -1.0046, -1.7561], [-2.9549, -1.3228, -2.1347]], [[-4.8963, -2.3696, -3.0359], [-2.8485, -0.9842, -1.7426], [-2.9453, -1.3338, -2.1463]],
[[-5.8168, -3.4129, -4.0778], [-3.8651, -2.2214, -3.0277], [-3.8356, -2.4643, -3.3535]], [[-5.8099, -3.4140, -4.1025], [-3.8578, -2.2100, -3.0337], [-3.8383, -2.4615, -3.3681]],
[[-0.0078, 3.9952, 4.0754], [2.9856, 4.6944, 5.0035], [3.2413, 4.7813, 4.9969]], [[-0.0314, 3.9864, 4.0536], [2.9637, 4.6879, 4.9976], [3.2074, 4.7690, 4.9946]],
], ],
device=torch_device, device=torch_device,
) )
else:
expected_slice = torch.tensor(
[
[[-4.8960, -2.3688, -3.0355], [-2.8478, -0.9836, -1.7418], [-2.9449, -1.3332, -2.1456]],
[[-5.8081, -3.4124, -4.1006], [-3.8561, -2.2081, -3.0323], [-3.8365, -2.4601, -3.3669]],
[[-0.0309, 3.9868, 4.0540], [2.9640, 4.6877, 4.9976], [3.2081, 4.7690, 4.9942]],
],
device=torch_device,
)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4) torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
@slow @slow
@ -547,8 +532,8 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False) image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"]) image = ds[0]["image"].convert("RGB")
inputs = image_processor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass

View File

@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]

View File

@ -29,8 +29,6 @@ if is_torch_available():
import torch import torch
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import DPTImageProcessor from transformers import DPTImageProcessor
if is_torchvision_available(): if is_torchvision_available():
@ -94,24 +92,15 @@ class DPTImageProcessingTester:
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs # Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
def prepare_semantic_single_inputs(): def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
image = Image.open(dataset[0]["file"]) return example["image"], example["map"]
map = Image.open(dataset[1]["file"])
return image, map
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs # Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
def prepare_semantic_batch_inputs(): def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
image1 = Image.open(ds[0]["file"])
map1 = Image.open(ds[1]["file"])
image2 = Image.open(ds[2]["file"])
map2 = Image.open(ds[3]["file"])
return [image1, image2], [map1, map2]
@require_torch @require_torch
@ -187,7 +176,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672]) self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672])
@unittest.skip("temporary to avoid failing on circleci")
# Copied from transformers.tests.models.beit.test_image_processing_beit.BeitImageProcessingTest.test_call_segmentation_maps # Copied from transformers.tests.models.beit.test_image_processing_beit.BeitImageProcessingTest.test_call_segmentation_maps
def test_call_segmentation_maps(self): def test_call_segmentation_maps(self):
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
@ -296,7 +284,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255) self.assertTrue(encoding["labels"].max().item() <= 255)
@unittest.skip("temporary to avoid failing on circleci")
def test_reduce_labels(self): def test_reduce_labels(self):
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
image_processor = image_processing_class(**self.image_processor_dict) image_processor = image_processing_class(**self.image_processor_dict)
@ -319,7 +306,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
# Compare with non-reduced label to see if it's reduced by 1 # Compare with non-reduced label to see if it's reduced by 1
self.assertEqual(encoding["labels"][first_non_zero_coords].item(), first_non_zero_value - 1) self.assertEqual(encoding["labels"][first_non_zero_coords].item(), first_non_zero_value - 1)
@unittest.skip("temporary to avoid failing on circleci")
def test_slow_fast_equivalence(self): def test_slow_fast_equivalence(self):
if not self.test_slow_image_processor or not self.test_fast_image_processor: if not self.test_slow_image_processor or not self.test_fast_image_processor:
self.skipTest(reason="Skipping slow/fast equivalence test") self.skipTest(reason="Skipping slow/fast equivalence test")
@ -341,7 +327,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
) )
self.assertTrue(torch.allclose(image_encoding_slow.labels, image_encoding_fast.labels, atol=1e-1)) self.assertTrue(torch.allclose(image_encoding_slow.labels, image_encoding_fast.labels, atol=1e-1))
@unittest.skip("temporary to avoid failing on circleci")
def test_slow_fast_equivalence_batched(self): def test_slow_fast_equivalence_batched(self):
if not self.test_slow_image_processor or not self.test_fast_image_processor: if not self.test_slow_image_processor or not self.test_fast_image_processor:
self.skipTest(reason="Skipping slow/fast equivalence test") self.skipTest(reason="Skipping slow/fast equivalence test")

View File

@ -391,7 +391,7 @@ class GraniteSpeechForConditionalGenerationIntegrationTest(unittest.TestCase):
EXPECTED_DECODED_TEXT = [ EXPECTED_DECODED_TEXT = [
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantmister quilter is the apostle of the middle classes and we are glad to welcome his gospel", "systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantmister quilter is the apostle of the middle classes and we are glad to welcome his gospel",
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilp's manner less interesting than his matter" "systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilter's manner less interesting than his matter"
] # fmt: skip ] # fmt: skip
self.assertEqual( self.assertEqual(

View File

@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]

View File

@ -123,13 +123,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
def test_layoutlmv2_integration_test(self): def test_layoutlmv2_integration_test(self):
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
# with apply_OCR = True # with apply_OCR = True
image_processing = image_processing_class() image_processing = image_processing_class()
image = Image.open(ds[0]["file"]).convert("RGB") image = ds[0]["image"]
encoding = image_processing(image, return_tensors="pt") encoding = image_processing(image, return_tensors="pt")

View File

@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv2ImageProcessor from transformers import LayoutLMv2ImageProcessor
@ -156,11 +154,11 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset from datasets import load_dataset
# set up # set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True) datasets = load_dataset("nielsr/funsd")
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
def preprocess_data(examples): def preprocess_data(examples):
images = [Image.open(path).convert("RGB") for path in examples["image_path"]] images = [image.convert("RGB") for image in examples["image"]]
words = examples["words"] words = examples["words"]
boxes = examples["bboxes"] boxes = examples["bboxes"]
word_labels = examples["ner_tags"] word_labels = examples["ner_tags"]
@ -192,12 +190,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset # we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
@cached_property @cached_property
def get_tokenizers(self): def get_tokenizers(self):

View File

@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor from transformers import LayoutLMv3ImageProcessor
if is_torchvision_available(): if is_torchvision_available():
@ -103,17 +101,16 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42) image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.size, {"height": 42, "width": 42})
@unittest.skip("temporary to avoid failing on circleci")
def test_LayoutLMv3_integration_test(self): def test_LayoutLMv3_integration_test(self):
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
# with apply_OCR = True # with apply_OCR = True
for image_processing_class in self.image_processor_list: for image_processing_class in self.image_processor_list:
image_processor = image_processing_class() image_processor = image_processing_class()
image = Image.open(ds[0]["file"]).convert("RGB") image = ds[0]["image"].convert("RGB")
encoding = image_processor(image, return_tensors="pt") encoding = image_processor(image, return_tensors="pt")

View File

@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor from transformers import LayoutLMv3ImageProcessor
@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset # we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
@cached_property @cached_property
def get_tokenizers(self): def get_tokenizers(self):

View File

@ -33,8 +33,6 @@ from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv2ImageProcessor from transformers import LayoutLMv2ImageProcessor
@ -162,11 +160,11 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset from datasets import load_dataset
# set up # set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True) datasets = load_dataset("nielsr/funsd")
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False) processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
def preprocess_data(examples): def preprocess_data(examples):
images = [Image.open(path).convert("RGB") for path in examples["image_path"]] images = [image.convert("RGB") for image in examples["image"]]
words = examples["words"] words = examples["words"]
boxes = examples["bboxes"] boxes = examples["bboxes"]
word_labels = examples["ner_tags"] word_labels = examples["ner_tags"]
@ -200,12 +198,8 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset # we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
@cached_property @cached_property
def get_tokenizers(self): def get_tokenizers(self):

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch import torch
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import MobileViTImageProcessor from transformers import MobileViTImageProcessor
@ -86,23 +84,14 @@ class MobileViTImageProcessingTester:
def prepare_semantic_single_inputs(): def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
image = Image.open(dataset[0]["file"]) return example["image"], example["map"]
map = Image.open(dataset[1]["file"])
return image, map
def prepare_semantic_batch_inputs(): def prepare_semantic_batch_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
image1 = Image.open(dataset[0]["file"])
map1 = Image.open(dataset[1]["file"])
image2 = Image.open(dataset[2]["file"])
map2 = Image.open(dataset[3]["file"])
return [image1, image2], [map1, map2]
@require_torch @require_torch
@ -135,7 +124,6 @@ class MobileViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.size, {"shortest_edge": 42})
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
@unittest.skip("temporary to avoid failing on circleci")
def test_call_segmentation_maps(self): def test_call_segmentation_maps(self):
# Initialize image_processing # Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict) image_processing = self.image_processing_class(**self.image_processor_dict)

View File

@ -86,8 +86,12 @@ class NougatImageProcessingTester:
return self.num_channels, self.size["height"], self.size["width"] return self.num_channels, self.size["height"], self.size["width"]
def prepare_dummy_image(self): def prepare_dummy_image(self):
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
filepath = hf_hub_download( filepath = hf_hub_download(
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset" repo_id="hf-internal-testing/fixtures_docvqa",
filename="nougat_pdf.png",
repo_type="dataset",
revision=revision,
) )
image = Image.open(filepath).convert("RGB") image = Image.open(filepath).convert("RGB")
return image return image
@ -136,7 +140,6 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.size, {"height": 42, "width": 42})
@unittest.skip("temporary to avoid failing on circleci")
def test_expected_output(self): def test_expected_output(self):
dummy_image = self.image_processor_tester.prepare_dummy_image() dummy_image = self.image_processor_tester.prepare_dummy_image()
image_processor = self.image_processor image_processor = self.image_processor
@ -180,13 +183,16 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual((3, 100, 200), aligned_image.shape) self.assertEqual((3, 100, 200), aligned_image.shape)
def prepare_dummy_np_image(self): def prepare_dummy_np_image(self):
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
filepath = hf_hub_download( filepath = hf_hub_download(
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset" repo_id="hf-internal-testing/fixtures_docvqa",
filename="nougat_pdf.png",
repo_type="dataset",
revision=revision,
) )
image = Image.open(filepath).convert("RGB") image = Image.open(filepath).convert("RGB")
return np.array(image) return np.array(image)
@unittest.skip("temporary to avoid failing on circleci")
def test_crop_margin_equality_cv2_python(self): def test_crop_margin_equality_cv2_python(self):
image = self.prepare_dummy_np_image() image = self.prepare_dummy_np_image()
image_processor = self.image_processor image_processor = self.image_processor

View File

@ -842,11 +842,8 @@ def prepare_img():
# Helper functions for optical flow integration test # Helper functions for optical flow integration test
def prepare_optical_flow_images(): def prepare_optical_flow_images():
dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
image1 = Image.open(dataset[0]["file"]).convert("RGB") return list(ds["image"][:2])
image2 = Image.open(dataset[0]["file"]).convert("RGB")
return image1, image2
def normalize(img): def normalize(img):

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch import torch
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import SegformerImageProcessor from transformers import SegformerImageProcessor
@ -86,23 +84,14 @@ class SegformerImageProcessingTester:
def prepare_semantic_single_inputs(): def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
image = Image.open(dataset[0]["file"]) return example["image"], example["map"]
map = Image.open(dataset[1]["file"])
return image, map
def prepare_semantic_batch_inputs(): def prepare_semantic_batch_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
image1 = Image.open(dataset[0]["file"])
map1 = Image.open(dataset[1]["file"])
image2 = Image.open(dataset[2]["file"])
map2 = Image.open(dataset[3]["file"])
return [image1, image2], [map1, map2]
@require_torch @require_torch
@ -138,7 +127,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.size, {"height": 42, "width": 42})
self.assertEqual(image_processor.do_reduce_labels, True) self.assertEqual(image_processor.do_reduce_labels, True)
@unittest.skip("temporary to avoid failing on circleci")
def test_call_segmentation_maps(self): def test_call_segmentation_maps(self):
# Initialize image_processing # Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict) image_processing = self.image_processing_class(**self.image_processor_dict)
@ -245,7 +233,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255) self.assertTrue(encoding["labels"].max().item() <= 255)
@unittest.skip("temporary to avoid failing on circleci")
def test_reduce_labels(self): def test_reduce_labels(self):
# Initialize image_processing # Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict) image_processing = self.image_processing_class(**self.image_processor_dict)

View File

@ -16,9 +16,9 @@ import copy
import inspect import inspect
import unittest import unittest
from huggingface_hub import hf_hub_download from datasets import load_dataset
from transformers import UdopConfig, is_torch_available, is_vision_available from transformers import UdopConfig, is_torch_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_sentencepiece, require_sentencepiece,
require_tokenizers, require_tokenizers,
@ -42,10 +42,6 @@ if is_torch_available():
from transformers import UdopEncoderModel, UdopForConditionalGeneration, UdopModel, UdopProcessor from transformers import UdopEncoderModel, UdopForConditionalGeneration, UdopModel, UdopProcessor
if is_vision_available():
from PIL import Image
class UdopModelTester: class UdopModelTester:
def __init__( def __init__(
self, self,
@ -618,12 +614,8 @@ class UdopEncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
class UdopModelIntegrationTests(unittest.TestCase): class UdopModelIntegrationTests(unittest.TestCase):
@cached_property @cached_property
def image(self): def image(self):
filepath = hf_hub_download( ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
repo_id="hf-internal-testing/fixtures_docvqa", filename="document_2.png", repo_type="dataset" return ds[1]["image"]
)
image = Image.open(filepath).convert("RGB")
return image
@cached_property @cached_property
def processor(self): def processor(self):

View File

@ -41,8 +41,6 @@ if is_torch_available():
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor from transformers import LayoutLMv3ImageProcessor
@ -184,11 +182,11 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset from datasets import load_dataset
# set up # set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True) datasets = load_dataset("nielsr/funsd")
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False) processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
def preprocess_data(examples): def preprocess_data(examples):
images = [Image.open(path).convert("RGB") for path in examples["image_path"]] images = [image.convert("RGB") for image in examples["image"]]
words = examples["words"] words = examples["words"]
boxes = examples["bboxes"] boxes = examples["bboxes"]
word_labels = examples["ner_tags"] word_labels = examples["ner_tags"]
@ -222,12 +220,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset # we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
@cached_property @cached_property
def get_tokenizers(self): def get_tokenizers(self):

View File

@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]

View File

@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]

View File

@ -15,7 +15,7 @@
import unittest import unittest
from huggingface_hub import hf_hub_download from datasets import load_dataset
from transformers import ConvNextConfig, UperNetConfig from transformers import ConvNextConfig, UperNetConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
@ -41,8 +41,6 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import AutoImageProcessor from transformers import AutoImageProcessor
@ -277,11 +275,8 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
# We will verify our results on an image of ADE20k # We will verify our results on an image of ADE20k
def prepare_img(): def prepare_img():
filepath = hf_hub_download( ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
repo_id="hf-internal-testing/fixtures_ade20k", repo_type="dataset", filename="ADE_val_00000001.jpg" return ds[0]["image"].convert("RGB")
)
image = Image.open(filepath).convert("RGB")
return image
@require_torch @require_torch
@ -302,7 +297,7 @@ class UperNetModelIntegrationTest(unittest.TestCase):
self.assertEqual(outputs.logits.shape, expected_shape) self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor( expected_slice = torch.tensor(
[[-7.5958, -7.5958, -7.4302], [-7.5958, -7.5958, -7.4302], [-7.4797, -7.4797, -7.3068]] [[-7.5969, -7.5969, -7.4313], [-7.5969, -7.5969, -7.4313], [-7.4808, -7.4808, -7.3080]]
).to(torch_device) ).to(torch_device)
torch.testing.assert_close(outputs.logits[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4) torch.testing.assert_close(outputs.logits[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)

View File

@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):
processor = self.default_processor processor = self.default_processor
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True) dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
image1 = Image.open(dataset[0]["file"]).convert("RGB") image1 = dataset[0]["image"]
image2 = Image.open(dataset[1]["file"]).convert("RGB") image2 = dataset[1]["image"]
text = ( text = (
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are" "The left image contains twice the number of dogs as the right image, and at least two dogs in total are"

View File

@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_handwritten(self): def test_inference_handwritten(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device) model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True) dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
image = Image.open(dataset[0]["file"]).convert("RGB") image = dataset[1]["image"].convert("RGB")
processor = self.default_processor processor = self.default_processor
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device) pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_printed(self): def test_inference_printed(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device) model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True) dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
image = Image.open(dataset[1]["file"]).convert("RGB") image = dataset[0]["image"].convert("RGB")
processor = self.default_processor processor = self.default_processor
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device) pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)

View File

@ -97,9 +97,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
try: try:
_ = in_queue.get(timeout=timeout) _ = in_queue.get(timeout=timeout)
ds = load_dataset( ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds)) sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample( resampled_audio = torchaudio.functional.resample(
@ -1470,7 +1468,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]
@ -1836,9 +1834,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode @require_pyctcdecode
@require_torchaudio @require_torchaudio
def test_wav2vec2_with_lm(self): def test_wav2vec2_with_lm(self):
ds = load_dataset( ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds)) sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample( resampled_audio = torchaudio.functional.resample(
@ -1862,9 +1858,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode @require_pyctcdecode
@require_torchaudio @require_torchaudio
def test_wav2vec2_with_lm_pool(self): def test_wav2vec2_with_lm_pool(self):
ds = load_dataset( ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds)) sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample( resampled_audio = torchaudio.functional.resample(
@ -1963,9 +1957,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"} LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
def run_model(lang): def run_model(lang):
ds = load_dataset( ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds)) sample = next(iter(ds))
wav2vec2_lang = LANG_MAP[lang] wav2vec2_lang = LANG_MAP[lang]

View File

@ -463,9 +463,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
def test_word_time_stamp_integration(self): def test_word_time_stamp_integration(self):
import torch import torch
ds = load_dataset( ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
ds_iter = iter(ds) ds_iter = iter(ds)
sample = next(ds_iter) sample = next(ds_iter)

View File

@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True) ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples] return ds[:num_samples]

View File

@ -1645,9 +1645,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
model.to(torch_device) model.to(torch_device)
ds = load_dataset( ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"] input_speech = next(iter(ds))["audio"]["array"]
@ -1714,11 +1712,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
token = os.getenv("HF_HUB_READ_TOKEN", True) token = os.getenv("HF_HUB_READ_TOKEN", True)
ds = load_dataset( ds = load_dataset(
"mozilla-foundation/common_voice_6_1", "hf-internal-testing/fixtures_common_voice",
"ja", "ja",
split="test", split="test",
streaming=True, streaming=True,
trust_remote_code=True,
token=token, token=token,
) )
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000)) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
@ -1728,7 +1725,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
torch_device torch_device
) )
EXPECTED_TRANSCRIPTS = ["木村さんに電話を貸してもらいました", " Kimura-san called me."] EXPECTED_TRANSCRIPTS = [
"夏の時期の時期でした",
" It was the time of day and all of the pens left during the summer.",
]
generated_ids = model.generate( generated_ids = model.generate(
input_features.repeat(2, 1, 1), input_features.repeat(2, 1, 1),

View File

@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
model = "superb/wav2vec2-base-superb-ks" model = "superb/wav2vec2-base-superb-ks"
audio_classifier = pipeline("audio-classification", model=model) audio_classifier = pipeline("audio-classification", model=model)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True) dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
audio = np.array(dataset[3]["speech"], dtype=np.float32) audio = np.array(dataset[3]["speech"], dtype=np.float32)
output = audio_classifier(audio, top_k=4) output = audio_classifier(audio, top_k=4)

View File

@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@require_torch @require_torch
@require_pyctcdecode @require_pyctcdecode
def test_large_model_pt_with_lm(self): def test_large_model_pt_with_lm(self):
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True) filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
third_item = next(iter(dataset["test"].skip(3)))
filename = third_item["file"]
speech_recognizer = pipeline( speech_recognizer = pipeline(
task="automatic-speech-recognition", task="automatic-speech-recognition",
@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=8, chunk_length_s=8,
stride_length_s=1, stride_length_s=1,
) )
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
sample = next(iter(data)) sample = next(iter(data))
res = pipe(sample["audio"]["array"]) res = pipe(sample["audio"]["array"])
@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
stride_length_s=1, stride_length_s=1,
return_language=True, return_language=True,
) )
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
sample = next(iter(data)) sample = next(iter(data))
res = pipe(sample["audio"]["array"]) res = pipe(sample["audio"]["array"])
@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
task="automatic-speech-recognition", task="automatic-speech-recognition",
model="openai/whisper-tiny.en", model="openai/whisper-tiny.en",
) )
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
samples = [next(iter(data)) for _ in range(8)] samples = [next(iter(data)) for _ in range(8)]
audio = np.concatenate([sample["audio"]["array"] for sample in samples]) audio = np.concatenate([sample["audio"]["array"] for sample in samples])
@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow @slow
def test_speculative_decoding_whisper_non_distil(self): def test_speculative_decoding_whisper_non_distil(self):
# Load data: # Load data:
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
# Load model: # Load model:
@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow @slow
def test_speculative_decoding_whisper_distil(self): def test_speculative_decoding_whisper_distil(self):
# Load data: # Load data:
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
# Load model: # Load model:

View File

@ -601,9 +601,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor) image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
file = image[0]["file"] image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(file, threshold=threshold) outputs = image_segmenter(image, threshold=threshold)
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
@ -655,9 +655,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
def test_oneformer(self): def test_oneformer(self):
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny") image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True) ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
file = image[0]["file"] image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(file, threshold=0.99) outputs = image_segmenter(image, threshold=0.99)
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])
@ -679,7 +679,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
) )
# Different task # Different task
outputs = image_segmenter(file, threshold=0.99, subtask="instance") outputs = image_segmenter(image, threshold=0.99, subtask="instance")
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])
@ -701,7 +701,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
) )
# Different task # Different task
outputs = image_segmenter(file, subtask="semantic") outputs = image_segmenter(image, subtask="semantic")
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])