mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
Remove script datasets in tests (#38940)
* remove trust_remote_code
* again
* Revert "Skip some tests for now (#38931)"
This reverts commit 31d30b7224
.
* again
* style
* again
* again
* style
* fix integration test
* fix tests
* style
* fix
* fix
* fix the last ones
* style
* last one
* fix last
* fix
---------
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
3c322c9cdf
commit
858f9b71a8
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
|||||||
```python
|
```python
|
||||||
>>> # let's load an audio sample from an Arabic speech corpus
|
>>> # let's load an audio sample from an Arabic speech corpus
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
||||||
>>> audio_sample = next(iter(dataset))["audio"]
|
>>> audio_sample = next(iter(dataset))["audio"]
|
||||||
|
|
||||||
>>> # now, process it
|
>>> # now, process it
|
||||||
|
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
|||||||
```python
|
```python
|
||||||
>>> # let's load an audio sample from an Arabic speech corpus
|
>>> # let's load an audio sample from an Arabic speech corpus
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
||||||
>>> audio_sample = next(iter(dataset))["audio"]
|
>>> audio_sample = next(iter(dataset))["audio"]
|
||||||
|
|
||||||
>>> # now, process it
|
>>> # now, process it
|
||||||
|
@ -264,7 +264,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config clean
|
--dataset_config clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
--trust_remote_code
|
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--overwrite_output_dir
|
--overwrite_output_dir
|
||||||
--num_train_epochs=2
|
--num_train_epochs=2
|
||||||
|
@ -312,7 +312,6 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
|||||||
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
|
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
|
||||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
--trust_remote_code
|
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
--per_device_train_batch_size 2
|
--per_device_train_batch_size 2
|
||||||
--per_device_eval_batch_size 1
|
--per_device_eval_batch_size 1
|
||||||
|
@ -17,7 +17,6 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
|
from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
|
||||||
@ -391,7 +390,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
--trust_remote_code
|
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -415,7 +413,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
result = get_results(tmp_dir)
|
result = get_results(tmp_dir)
|
||||||
self.assertGreaterEqual(result["eval_accuracy"], 0.8)
|
self.assertGreaterEqual(result["eval_accuracy"], 0.8)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_run_speech_recognition_ctc(self):
|
def test_run_speech_recognition_ctc(self):
|
||||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||||
testargs = f"""
|
testargs = f"""
|
||||||
@ -426,7 +423,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
--trust_remote_code
|
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -447,7 +443,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
result = get_results(tmp_dir)
|
result = get_results(tmp_dir)
|
||||||
self.assertLess(result["eval_loss"], result["train_loss"])
|
self.assertLess(result["eval_loss"], result["train_loss"])
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_run_speech_recognition_ctc_adapter(self):
|
def test_run_speech_recognition_ctc_adapter(self):
|
||||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||||
testargs = f"""
|
testargs = f"""
|
||||||
@ -458,7 +453,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
--trust_remote_code
|
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -481,7 +475,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "./adapter.tur.safetensors")))
|
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "./adapter.tur.safetensors")))
|
||||||
self.assertLess(result["eval_loss"], result["train_loss"])
|
self.assertLess(result["eval_loss"], result["train_loss"])
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_run_speech_recognition_seq2seq(self):
|
def test_run_speech_recognition_seq2seq(self):
|
||||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||||
testargs = f"""
|
testargs = f"""
|
||||||
@ -492,7 +485,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
--trust_remote_code
|
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -520,7 +512,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
||||||
--dataset_name anton-l/superb_demo
|
--dataset_name anton-l/superb_demo
|
||||||
--trust_remote_code
|
|
||||||
--dataset_config_name ks
|
--dataset_config_name ks
|
||||||
--train_split_name test
|
--train_split_name test
|
||||||
--eval_split_name test
|
--eval_split_name test
|
||||||
@ -555,7 +546,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||||
--dataset_config_names clean
|
--dataset_config_names clean
|
||||||
--dataset_split_names validation
|
--dataset_split_names validation
|
||||||
--trust_remote_code
|
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
--per_device_train_batch_size 4
|
--per_device_train_batch_size 4
|
||||||
--per_device_eval_batch_size 4
|
--per_device_eval_batch_size 4
|
||||||
@ -576,7 +566,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
run_mae.py
|
run_mae.py
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
--trust_remote_code
|
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
|
@ -315,7 +315,6 @@ class ExamplesTests(TestCasePlus):
|
|||||||
testargs = f"""
|
testargs = f"""
|
||||||
run_image_classification.py
|
run_image_classification.py
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
--trust_remote_code
|
|
||||||
--model_name_or_path microsoft/resnet-18
|
--model_name_or_path microsoft/resnet-18
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
|
@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo
|
|||||||
|
|
||||||
if "speech-commands" in model_name:
|
if "speech-commands" in model_name:
|
||||||
# TODO: Convert dataset to Parquet
|
# TODO: Convert dataset to Parquet
|
||||||
dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True)
|
dataset = load_dataset("google/speech_commands", "v0.02", split="validation")
|
||||||
waveform = dataset[0]["audio"]["array"]
|
waveform = dataset[0]["audio"]["array"]
|
||||||
else:
|
else:
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
|
@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
|
|||||||
# Check outputs on an image
|
# Check outputs on an image
|
||||||
if is_semantic:
|
if is_semantic:
|
||||||
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
|
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
image = Image.open(ds[0]["file"])
|
image = Image.open(ds[0]["file"])
|
||||||
else:
|
else:
|
||||||
image_processor = BeitImageProcessor(
|
image_processor = BeitImageProcessor(
|
||||||
|
@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint(
|
|||||||
|
|
||||||
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
|
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
input_audio = [x["array"] for x in ds[:4]["audio"]]
|
input_audio = [x["array"] for x in ds[:4]["audio"]]
|
||||||
|
|
||||||
inputs = processor(input_audio, return_tensors="pt", padding=True)
|
inputs = processor(input_audio, return_tensors="pt", padding=True)
|
||||||
|
@ -1212,7 +1212,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||||
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
>>> words = example["words"]
|
>>> words = example["words"]
|
||||||
|
@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||||
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
>>> words = example["words"]
|
>>> words = example["words"]
|
||||||
|
@ -753,9 +753,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
|
|||||||
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
|
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
||||||
>>> image_path = dataset["test"][0]["file"]
|
>>> image = dataset["test"][0]["image"]
|
||||||
>>> image = Image.open(image_path).convert("RGB")
|
|
||||||
|
|
||||||
>>> encoding = processor(image, return_tensors="pt")
|
>>> encoding = processor(image, return_tensors="pt")
|
||||||
|
|
||||||
@ -943,7 +942,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
|
|||||||
|
|
||||||
>>> set_seed(0)
|
>>> set_seed(0)
|
||||||
|
|
||||||
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True)
|
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True)
|
||||||
>>> data = next(iter(dataset))
|
>>> data = next(iter(dataset))
|
||||||
>>> image = data["image"].convert("RGB")
|
>>> image = data["image"].convert("RGB")
|
||||||
|
|
||||||
@ -1145,7 +1144,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
|
|||||||
|
|
||||||
>>> set_seed(0)
|
>>> set_seed(0)
|
||||||
|
|
||||||
>>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True)
|
>>> datasets = load_dataset("nielsr/funsd", split="test")
|
||||||
>>> labels = datasets.features["ner_tags"].feature.names
|
>>> labels = datasets.features["ner_tags"].feature.names
|
||||||
>>> id2label = {v: k for v, k in enumerate(labels)}
|
>>> id2label = {v: k for v, k in enumerate(labels)}
|
||||||
|
|
||||||
@ -1302,9 +1301,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
||||||
>>> image_path = dataset["test"][0]["file"]
|
>>> image = dataset["test"][0]["image"]
|
||||||
>>> image = Image.open(image_path).convert("RGB")
|
|
||||||
>>> question = "When is coffee break?"
|
>>> question = "When is coffee break?"
|
||||||
>>> encoding = processor(image, question, return_tensors="pt")
|
>>> encoding = processor(image, question, return_tensors="pt")
|
||||||
|
|
||||||
|
@ -736,7 +736,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -951,7 +951,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1052,7 +1052,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
@ -1172,7 +1172,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
|
@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
|
@ -644,7 +644,7 @@ class LiltModel(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -784,7 +784,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -899,7 +899,7 @@ class LiltForTokenClassification(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -1016,7 +1016,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
|
@ -2197,7 +2197,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin):
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset(
|
>>> dataset = load_dataset(
|
||||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
||||||
... ) # doctest: +IGNORE_RESULT
|
... ) # doctest: +IGNORE_RESULT
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
@ -2878,7 +2878,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset(
|
>>> dataset = load_dataset(
|
||||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
||||||
... ) # doctest: +IGNORE_RESULT
|
... ) # doctest: +IGNORE_RESULT
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
@ -1608,7 +1608,7 @@ class UdopModel(UdopPreTrainedModel):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1817,7 +1817,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -2029,7 +2029,7 @@ class UdopEncoderModel(UdopPreTrainedModel):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
|
@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
|
|||||||
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
||||||
|
|
||||||
>>> # load first sample of English common_voice
|
>>> # load first sample of English common_voice
|
||||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
>>> dataset_iter = iter(dataset)
|
>>> dataset_iter = iter(dataset)
|
||||||
>>> sample = next(dataset_iter)
|
>>> sample = next(dataset_iter)
|
||||||
|
@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
|
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
|
||||||
|
|
||||||
>>> # load first sample of English common_voice
|
>>> # load first sample of English common_voice
|
||||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
>>> dataset_iter = iter(dataset)
|
>>> dataset_iter = iter(dataset)
|
||||||
>>> sample = next(dataset_iter)
|
>>> sample = next(dataset_iter)
|
||||||
|
@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r"""
|
|||||||
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
|
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
|
||||||
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
|
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
|
||||||
... )
|
... )
|
||||||
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True)
|
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True)
|
||||||
|
|
||||||
>>> sample = next(iter(ds))
|
>>> sample = next(iter(ds))
|
||||||
|
|
||||||
|
@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
>>> dataset = load_dataset("huggingface/cats-image")
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
>>> dataset = load_dataset("huggingface/cats-image")
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> from transformers import AutoProcessor, {model_class}
|
>>> from transformers import AutoProcessor, {model_class}
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> from transformers import AutoImageProcessor, {model_class}
|
>>> from transformers import AutoImageProcessor, {model_class}
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
>>> dataset = load_dataset("huggingface/cats-image")
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
>>> dataset = load_dataset("huggingface/cats-image"))
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
|
@ -270,7 +270,6 @@ def make_task_cmds():
|
|||||||
"img_clas": f"""
|
"img_clas": f"""
|
||||||
{scripts_dir}/image-classification/run_image_classification.py
|
{scripts_dir}/image-classification/run_image_classification.py
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
--trust_remote_code
|
|
||||||
--remove_unused_columns False
|
--remove_unused_columns False
|
||||||
--max_steps 10
|
--max_steps 10
|
||||||
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
||||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import BeitImageProcessor
|
from transformers import BeitImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -98,23 +96,14 @@ class BeitImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
example = ds[0]
|
||||||
image = Image.open(dataset[0]["file"])
|
return example["image"], example["map"]
|
||||||
map = Image.open(dataset[1]["file"])
|
|
||||||
|
|
||||||
return image, map
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||||
image1 = Image.open(ds[0]["file"])
|
|
||||||
map1 = Image.open(ds[1]["file"])
|
|
||||||
image2 = Image.open(ds[2]["file"])
|
|
||||||
map2 = Image.open(ds[3]["file"])
|
|
||||||
|
|
||||||
return [image1, image2], [map1, map2]
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@ -157,7 +146,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
|
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
|
||||||
self.assertEqual(image_processor.do_reduce_labels, True)
|
self.assertEqual(image_processor.do_reduce_labels, True)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_call_segmentation_maps(self):
|
def test_call_segmentation_maps(self):
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
# Initialize image_processing
|
# Initialize image_processing
|
||||||
@ -265,7 +253,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertTrue(encoding["labels"].min().item() >= 0)
|
self.assertTrue(encoding["labels"].min().item() >= 0)
|
||||||
self.assertTrue(encoding["labels"].max().item() <= 255)
|
self.assertTrue(encoding["labels"].max().item() <= 255)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_reduce_labels(self):
|
def test_reduce_labels(self):
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
# Initialize image_processing
|
# Initialize image_processing
|
||||||
@ -282,7 +269,6 @@ class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertTrue(encoding["labels"].min().item() >= 0)
|
self.assertTrue(encoding["labels"].min().item() >= 0)
|
||||||
self.assertTrue(encoding["labels"].max().item() <= 255)
|
self.assertTrue(encoding["labels"].max().item() <= 255)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_slow_fast_equivalence(self):
|
def test_slow_fast_equivalence(self):
|
||||||
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
||||||
self.skipTest(reason="Skipping slow/fast equivalence test")
|
self.skipTest(reason="Skipping slow/fast equivalence test")
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from transformers import BeitConfig
|
from transformers import BeitConfig
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
@ -53,7 +52,6 @@ if is_torch_available():
|
|||||||
|
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
import PIL
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import BeitImageProcessor
|
from transformers import BeitImageProcessor
|
||||||
@ -504,8 +502,8 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
image = Image.open(ds[0]["file"])
|
image = ds[0]["image"].convert("RGB")
|
||||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||||
|
|
||||||
# forward pass
|
# forward pass
|
||||||
@ -517,27 +515,14 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
|||||||
expected_shape = torch.Size((1, 150, 160, 160))
|
expected_shape = torch.Size((1, 150, 160, 160))
|
||||||
self.assertEqual(logits.shape, expected_shape)
|
self.assertEqual(logits.shape, expected_shape)
|
||||||
|
|
||||||
is_pillow_less_than_9 = version.parse(PIL.__version__) < version.parse("9.0.0")
|
|
||||||
|
|
||||||
if is_pillow_less_than_9:
|
|
||||||
expected_slice = torch.tensor(
|
expected_slice = torch.tensor(
|
||||||
[
|
[
|
||||||
[[-4.9225, -2.3954, -3.0522], [-2.8822, -1.0046, -1.7561], [-2.9549, -1.3228, -2.1347]],
|
[[-4.8963, -2.3696, -3.0359], [-2.8485, -0.9842, -1.7426], [-2.9453, -1.3338, -2.1463]],
|
||||||
[[-5.8168, -3.4129, -4.0778], [-3.8651, -2.2214, -3.0277], [-3.8356, -2.4643, -3.3535]],
|
[[-5.8099, -3.4140, -4.1025], [-3.8578, -2.2100, -3.0337], [-3.8383, -2.4615, -3.3681]],
|
||||||
[[-0.0078, 3.9952, 4.0754], [2.9856, 4.6944, 5.0035], [3.2413, 4.7813, 4.9969]],
|
[[-0.0314, 3.9864, 4.0536], [2.9637, 4.6879, 4.9976], [3.2074, 4.7690, 4.9946]],
|
||||||
],
|
],
|
||||||
device=torch_device,
|
device=torch_device,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
expected_slice = torch.tensor(
|
|
||||||
[
|
|
||||||
[[-4.8960, -2.3688, -3.0355], [-2.8478, -0.9836, -1.7418], [-2.9449, -1.3332, -2.1456]],
|
|
||||||
[[-5.8081, -3.4124, -4.1006], [-3.8561, -2.2081, -3.0323], [-3.8365, -2.4601, -3.3669]],
|
|
||||||
[[-0.0309, 3.9868, 4.0540], [2.9640, 4.6877, 4.9976], [3.2081, 4.7690, 4.9942]],
|
|
||||||
],
|
|
||||||
device=torch_device,
|
|
||||||
)
|
|
||||||
|
|
||||||
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
|
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@ -547,8 +532,8 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
image = Image.open(ds[0]["file"])
|
image = ds[0]["image"].convert("RGB")
|
||||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||||
|
|
||||||
# forward pass
|
# forward pass
|
||||||
|
@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
@ -29,8 +29,6 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import DPTImageProcessor
|
from transformers import DPTImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -94,24 +92,15 @@ class DPTImageProcessingTester:
|
|||||||
|
|
||||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
|
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
example = ds[0]
|
||||||
image = Image.open(dataset[0]["file"])
|
return example["image"], example["map"]
|
||||||
map = Image.open(dataset[1]["file"])
|
|
||||||
|
|
||||||
return image, map
|
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
|
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||||
image1 = Image.open(ds[0]["file"])
|
|
||||||
map1 = Image.open(ds[1]["file"])
|
|
||||||
image2 = Image.open(ds[2]["file"])
|
|
||||||
map2 = Image.open(ds[3]["file"])
|
|
||||||
|
|
||||||
return [image1, image2], [map1, map2]
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@ -187,7 +176,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672])
|
self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672])
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.BeitImageProcessingTest.test_call_segmentation_maps
|
# Copied from transformers.tests.models.beit.test_image_processing_beit.BeitImageProcessingTest.test_call_segmentation_maps
|
||||||
def test_call_segmentation_maps(self):
|
def test_call_segmentation_maps(self):
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
@ -296,7 +284,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertTrue(encoding["labels"].min().item() >= 0)
|
self.assertTrue(encoding["labels"].min().item() >= 0)
|
||||||
self.assertTrue(encoding["labels"].max().item() <= 255)
|
self.assertTrue(encoding["labels"].max().item() <= 255)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_reduce_labels(self):
|
def test_reduce_labels(self):
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
image_processor = image_processing_class(**self.image_processor_dict)
|
image_processor = image_processing_class(**self.image_processor_dict)
|
||||||
@ -319,7 +306,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
# Compare with non-reduced label to see if it's reduced by 1
|
# Compare with non-reduced label to see if it's reduced by 1
|
||||||
self.assertEqual(encoding["labels"][first_non_zero_coords].item(), first_non_zero_value - 1)
|
self.assertEqual(encoding["labels"][first_non_zero_coords].item(), first_non_zero_value - 1)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_slow_fast_equivalence(self):
|
def test_slow_fast_equivalence(self):
|
||||||
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
||||||
self.skipTest(reason="Skipping slow/fast equivalence test")
|
self.skipTest(reason="Skipping slow/fast equivalence test")
|
||||||
@ -341,7 +327,6 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertTrue(torch.allclose(image_encoding_slow.labels, image_encoding_fast.labels, atol=1e-1))
|
self.assertTrue(torch.allclose(image_encoding_slow.labels, image_encoding_fast.labels, atol=1e-1))
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_slow_fast_equivalence_batched(self):
|
def test_slow_fast_equivalence_batched(self):
|
||||||
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
if not self.test_slow_image_processor or not self.test_fast_image_processor:
|
||||||
self.skipTest(reason="Skipping slow/fast equivalence test")
|
self.skipTest(reason="Skipping slow/fast equivalence test")
|
||||||
|
@ -391,7 +391,7 @@ class GraniteSpeechForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
EXPECTED_DECODED_TEXT = [
|
EXPECTED_DECODED_TEXT = [
|
||||||
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantmister quilter is the apostle of the middle classes and we are glad to welcome his gospel",
|
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantmister quilter is the apostle of the middle classes and we are glad to welcome his gospel",
|
||||||
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilp's manner less interesting than his matter"
|
"systemKnowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant\nusercan you transcribe the speech into a written format?\nassistantnor is mister quilter's manner less interesting than his matter"
|
||||||
] # fmt: skip
|
] # fmt: skip
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
|
|||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
@ -123,13 +123,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
|||||||
def test_layoutlmv2_integration_test(self):
|
def test_layoutlmv2_integration_test(self):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
# with apply_OCR = True
|
# with apply_OCR = True
|
||||||
image_processing = image_processing_class()
|
image_processing = image_processing_class()
|
||||||
|
|
||||||
image = Image.open(ds[0]["file"]).convert("RGB")
|
image = ds[0]["image"]
|
||||||
|
|
||||||
encoding = image_processing(image, return_tensors="pt")
|
encoding = image_processing(image, return_tensors="pt")
|
||||||
|
|
||||||
|
@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import LayoutLMv2ImageProcessor
|
from transformers import LayoutLMv2ImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -156,11 +154,11 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
datasets = load_dataset("nielsr/funsd")
|
||||||
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
|
images = [image.convert("RGB") for image in examples["image"]]
|
||||||
words = examples["words"]
|
words = examples["words"]
|
||||||
boxes = examples["bboxes"]
|
boxes = examples["bboxes"]
|
||||||
word_labels = examples["ner_tags"]
|
word_labels = examples["ner_tags"]
|
||||||
@ -192,12 +190,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
|
||||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
|
||||||
|
|
||||||
return image_1, image_2
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import LayoutLMv3ImageProcessor
|
from transformers import LayoutLMv3ImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -103,17 +101,16 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
|||||||
image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
|
image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
|
||||||
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_LayoutLMv3_integration_test(self):
|
def test_LayoutLMv3_integration_test(self):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
|
||||||
# with apply_OCR = True
|
# with apply_OCR = True
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
image_processor = image_processing_class()
|
image_processor = image_processing_class()
|
||||||
|
|
||||||
image = Image.open(ds[0]["file"]).convert("RGB")
|
image = ds[0]["image"].convert("RGB")
|
||||||
|
|
||||||
encoding = image_processor(image, return_tensors="pt")
|
encoding = image_processor(image, return_tensors="pt")
|
||||||
|
|
||||||
|
@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import LayoutLMv3ImageProcessor
|
from transformers import LayoutLMv3ImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
|
||||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
|
||||||
|
|
||||||
return image_1, image_2
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
@ -33,8 +33,6 @@ from ...test_processing_common import ProcessorTesterMixin
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import LayoutLMv2ImageProcessor
|
from transformers import LayoutLMv2ImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -162,11 +160,11 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
datasets = load_dataset("nielsr/funsd")
|
||||||
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
|
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
|
images = [image.convert("RGB") for image in examples["image"]]
|
||||||
words = examples["words"]
|
words = examples["words"]
|
||||||
boxes = examples["bboxes"]
|
boxes = examples["bboxes"]
|
||||||
word_labels = examples["ner_tags"]
|
word_labels = examples["ner_tags"]
|
||||||
@ -200,12 +198,8 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
|
||||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
|
||||||
|
|
||||||
return image_1, image_2
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import MobileViTImageProcessor
|
from transformers import MobileViTImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -86,23 +84,14 @@ class MobileViTImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
example = ds[0]
|
||||||
image = Image.open(dataset[0]["file"])
|
return example["image"], example["map"]
|
||||||
map = Image.open(dataset[1]["file"])
|
|
||||||
|
|
||||||
return image, map
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||||
image1 = Image.open(dataset[0]["file"])
|
|
||||||
map1 = Image.open(dataset[1]["file"])
|
|
||||||
image2 = Image.open(dataset[2]["file"])
|
|
||||||
map2 = Image.open(dataset[3]["file"])
|
|
||||||
|
|
||||||
return [image1, image2], [map1, map2]
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@ -135,7 +124,6 @@ class MobileViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertEqual(image_processor.size, {"shortest_edge": 42})
|
self.assertEqual(image_processor.size, {"shortest_edge": 42})
|
||||||
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
|
self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_call_segmentation_maps(self):
|
def test_call_segmentation_maps(self):
|
||||||
# Initialize image_processing
|
# Initialize image_processing
|
||||||
image_processing = self.image_processing_class(**self.image_processor_dict)
|
image_processing = self.image_processing_class(**self.image_processor_dict)
|
||||||
|
@ -86,8 +86,12 @@ class NougatImageProcessingTester:
|
|||||||
return self.num_channels, self.size["height"], self.size["width"]
|
return self.num_channels, self.size["height"], self.size["width"]
|
||||||
|
|
||||||
def prepare_dummy_image(self):
|
def prepare_dummy_image(self):
|
||||||
|
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
repo_id="hf-internal-testing/fixtures_docvqa",
|
||||||
|
filename="nougat_pdf.png",
|
||||||
|
repo_type="dataset",
|
||||||
|
revision=revision,
|
||||||
)
|
)
|
||||||
image = Image.open(filepath).convert("RGB")
|
image = Image.open(filepath).convert("RGB")
|
||||||
return image
|
return image
|
||||||
@ -136,7 +140,6 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
|
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
|
||||||
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_expected_output(self):
|
def test_expected_output(self):
|
||||||
dummy_image = self.image_processor_tester.prepare_dummy_image()
|
dummy_image = self.image_processor_tester.prepare_dummy_image()
|
||||||
image_processor = self.image_processor
|
image_processor = self.image_processor
|
||||||
@ -180,13 +183,16 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertEqual((3, 100, 200), aligned_image.shape)
|
self.assertEqual((3, 100, 200), aligned_image.shape)
|
||||||
|
|
||||||
def prepare_dummy_np_image(self):
|
def prepare_dummy_np_image(self):
|
||||||
|
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
repo_id="hf-internal-testing/fixtures_docvqa",
|
||||||
|
filename="nougat_pdf.png",
|
||||||
|
repo_type="dataset",
|
||||||
|
revision=revision,
|
||||||
)
|
)
|
||||||
image = Image.open(filepath).convert("RGB")
|
image = Image.open(filepath).convert("RGB")
|
||||||
return np.array(image)
|
return np.array(image)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_crop_margin_equality_cv2_python(self):
|
def test_crop_margin_equality_cv2_python(self):
|
||||||
image = self.prepare_dummy_np_image()
|
image = self.prepare_dummy_np_image()
|
||||||
image_processor = self.image_processor
|
image_processor = self.image_processor
|
||||||
|
@ -842,11 +842,8 @@ def prepare_img():
|
|||||||
|
|
||||||
# Helper functions for optical flow integration test
|
# Helper functions for optical flow integration test
|
||||||
def prepare_optical_flow_images():
|
def prepare_optical_flow_images():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
|
||||||
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
return list(ds["image"][:2])
|
||||||
image2 = Image.open(dataset[0]["file"]).convert("RGB")
|
|
||||||
|
|
||||||
return image1, image2
|
|
||||||
|
|
||||||
|
|
||||||
def normalize(img):
|
def normalize(img):
|
||||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import SegformerImageProcessor
|
from transformers import SegformerImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -86,23 +84,14 @@ class SegformerImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
example = ds[0]
|
||||||
image = Image.open(dataset[0]["file"])
|
return example["image"], example["map"]
|
||||||
map = Image.open(dataset[1]["file"])
|
|
||||||
|
|
||||||
return image, map
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
|
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||||
image1 = Image.open(dataset[0]["file"])
|
|
||||||
map1 = Image.open(dataset[1]["file"])
|
|
||||||
image2 = Image.open(dataset[2]["file"])
|
|
||||||
map2 = Image.open(dataset[3]["file"])
|
|
||||||
|
|
||||||
return [image1, image2], [map1, map2]
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@ -138,7 +127,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
|
||||||
self.assertEqual(image_processor.do_reduce_labels, True)
|
self.assertEqual(image_processor.do_reduce_labels, True)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_call_segmentation_maps(self):
|
def test_call_segmentation_maps(self):
|
||||||
# Initialize image_processing
|
# Initialize image_processing
|
||||||
image_processing = self.image_processing_class(**self.image_processor_dict)
|
image_processing = self.image_processing_class(**self.image_processor_dict)
|
||||||
@ -245,7 +233,6 @@ class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertTrue(encoding["labels"].min().item() >= 0)
|
self.assertTrue(encoding["labels"].min().item() >= 0)
|
||||||
self.assertTrue(encoding["labels"].max().item() <= 255)
|
self.assertTrue(encoding["labels"].max().item() <= 255)
|
||||||
|
|
||||||
@unittest.skip("temporary to avoid failing on circleci")
|
|
||||||
def test_reduce_labels(self):
|
def test_reduce_labels(self):
|
||||||
# Initialize image_processing
|
# Initialize image_processing
|
||||||
image_processing = self.image_processing_class(**self.image_processor_dict)
|
image_processing = self.image_processing_class(**self.image_processor_dict)
|
||||||
|
@ -16,9 +16,9 @@ import copy
|
|||||||
import inspect
|
import inspect
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from huggingface_hub import hf_hub_download
|
from datasets import load_dataset
|
||||||
|
|
||||||
from transformers import UdopConfig, is_torch_available, is_vision_available
|
from transformers import UdopConfig, is_torch_available
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
require_sentencepiece,
|
require_sentencepiece,
|
||||||
require_tokenizers,
|
require_tokenizers,
|
||||||
@ -42,10 +42,6 @@ if is_torch_available():
|
|||||||
from transformers import UdopEncoderModel, UdopForConditionalGeneration, UdopModel, UdopProcessor
|
from transformers import UdopEncoderModel, UdopForConditionalGeneration, UdopModel, UdopProcessor
|
||||||
|
|
||||||
|
|
||||||
if is_vision_available():
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
|
|
||||||
class UdopModelTester:
|
class UdopModelTester:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -618,12 +614,8 @@ class UdopEncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
class UdopModelIntegrationTests(unittest.TestCase):
|
class UdopModelIntegrationTests(unittest.TestCase):
|
||||||
@cached_property
|
@cached_property
|
||||||
def image(self):
|
def image(self):
|
||||||
filepath = hf_hub_download(
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
repo_id="hf-internal-testing/fixtures_docvqa", filename="document_2.png", repo_type="dataset"
|
return ds[1]["image"]
|
||||||
)
|
|
||||||
image = Image.open(filepath).convert("RGB")
|
|
||||||
|
|
||||||
return image
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def processor(self):
|
def processor(self):
|
||||||
|
@ -41,8 +41,6 @@ if is_torch_available():
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import LayoutLMv3ImageProcessor
|
from transformers import LayoutLMv3ImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -184,11 +182,11 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
datasets = load_dataset("nielsr/funsd")
|
||||||
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
|
images = [image.convert("RGB") for image in examples["image"]]
|
||||||
words = examples["words"]
|
words = examples["words"]
|
||||||
boxes = examples["bboxes"]
|
boxes = examples["bboxes"]
|
||||||
word_labels = examples["ner_tags"]
|
word_labels = examples["ner_tags"]
|
||||||
@ -222,12 +220,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||||
|
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
|
||||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
|
||||||
|
|
||||||
return image_1, image_2
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from huggingface_hub import hf_hub_download
|
from datasets import load_dataset
|
||||||
|
|
||||||
from transformers import ConvNextConfig, UperNetConfig
|
from transformers import ConvNextConfig, UperNetConfig
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
@ -41,8 +41,6 @@ if is_torch_available():
|
|||||||
|
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from transformers import AutoImageProcessor
|
from transformers import AutoImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -277,11 +275,8 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
|||||||
|
|
||||||
# We will verify our results on an image of ADE20k
|
# We will verify our results on an image of ADE20k
|
||||||
def prepare_img():
|
def prepare_img():
|
||||||
filepath = hf_hub_download(
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
repo_id="hf-internal-testing/fixtures_ade20k", repo_type="dataset", filename="ADE_val_00000001.jpg"
|
return ds[0]["image"].convert("RGB")
|
||||||
)
|
|
||||||
image = Image.open(filepath).convert("RGB")
|
|
||||||
return image
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@ -302,7 +297,7 @@ class UperNetModelIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(outputs.logits.shape, expected_shape)
|
self.assertEqual(outputs.logits.shape, expected_shape)
|
||||||
|
|
||||||
expected_slice = torch.tensor(
|
expected_slice = torch.tensor(
|
||||||
[[-7.5958, -7.5958, -7.4302], [-7.5958, -7.5958, -7.4302], [-7.4797, -7.4797, -7.3068]]
|
[[-7.5969, -7.5969, -7.4313], [-7.5969, -7.5969, -7.4313], [-7.4808, -7.4808, -7.3080]]
|
||||||
).to(torch_device)
|
).to(torch_device)
|
||||||
torch.testing.assert_close(outputs.logits[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
|
torch.testing.assert_close(outputs.logits[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
|
||||||
|
|
||||||
|
@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
|
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
|
||||||
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
image1 = dataset[0]["image"]
|
||||||
image2 = Image.open(dataset[1]["file"]).convert("RGB")
|
image2 = dataset[1]["image"]
|
||||||
|
|
||||||
text = (
|
text = (
|
||||||
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
|
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
|
||||||
|
@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
|||||||
def test_inference_handwritten(self):
|
def test_inference_handwritten(self):
|
||||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
|
||||||
image = Image.open(dataset[0]["file"]).convert("RGB")
|
image = dataset[1]["image"].convert("RGB")
|
||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||||
@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
|||||||
def test_inference_printed(self):
|
def test_inference_printed(self):
|
||||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
|
||||||
image = Image.open(dataset[1]["file"]).convert("RGB")
|
image = dataset[0]["image"].convert("RGB")
|
||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||||
|
@ -97,9 +97,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
|||||||
try:
|
try:
|
||||||
_ = in_queue.get(timeout=timeout)
|
_ = in_queue.get(timeout=timeout)
|
||||||
|
|
||||||
ds = load_dataset(
|
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1470,7 +1468,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
@ -1836,9 +1834,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
@require_torchaudio
|
@require_torchaudio
|
||||||
def test_wav2vec2_with_lm(self):
|
def test_wav2vec2_with_lm(self):
|
||||||
ds = load_dataset(
|
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1862,9 +1858,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
@require_torchaudio
|
@require_torchaudio
|
||||||
def test_wav2vec2_with_lm_pool(self):
|
def test_wav2vec2_with_lm_pool(self):
|
||||||
ds = load_dataset(
|
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1963,9 +1957,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
||||||
|
|
||||||
def run_model(lang):
|
def run_model(lang):
|
||||||
ds = load_dataset(
|
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
|
||||||
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
wav2vec2_lang = LANG_MAP[lang]
|
wav2vec2_lang = LANG_MAP[lang]
|
||||||
|
@ -463,9 +463,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
|
|||||||
def test_word_time_stamp_integration(self):
|
def test_word_time_stamp_integration(self):
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
ds = load_dataset(
|
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||||
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
ds_iter = iter(ds)
|
ds_iter = iter(ds)
|
||||||
sample = next(ds_iter)
|
sample = next(ds_iter)
|
||||||
|
@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
@ -1645,9 +1645,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
|||||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
|
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
|
|
||||||
ds = load_dataset(
|
ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
|
||||||
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
|
|
||||||
)
|
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
|
|
||||||
input_speech = next(iter(ds))["audio"]["array"]
|
input_speech = next(iter(ds))["audio"]["array"]
|
||||||
@ -1714,11 +1712,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
|||||||
|
|
||||||
token = os.getenv("HF_HUB_READ_TOKEN", True)
|
token = os.getenv("HF_HUB_READ_TOKEN", True)
|
||||||
ds = load_dataset(
|
ds = load_dataset(
|
||||||
"mozilla-foundation/common_voice_6_1",
|
"hf-internal-testing/fixtures_common_voice",
|
||||||
"ja",
|
"ja",
|
||||||
split="test",
|
split="test",
|
||||||
streaming=True,
|
streaming=True,
|
||||||
trust_remote_code=True,
|
|
||||||
token=token,
|
token=token,
|
||||||
)
|
)
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
@ -1728,7 +1725,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
|||||||
torch_device
|
torch_device
|
||||||
)
|
)
|
||||||
|
|
||||||
EXPECTED_TRANSCRIPTS = ["木村さんに電話を貸してもらいました", " Kimura-san called me."]
|
EXPECTED_TRANSCRIPTS = [
|
||||||
|
"夏の時期の時期でした",
|
||||||
|
" It was the time of day and all of the pens left during the summer.",
|
||||||
|
]
|
||||||
|
|
||||||
generated_ids = model.generate(
|
generated_ids = model.generate(
|
||||||
input_features.repeat(2, 1, 1),
|
input_features.repeat(2, 1, 1),
|
||||||
|
@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
|
|||||||
model = "superb/wav2vec2-base-superb-ks"
|
model = "superb/wav2vec2-base-superb-ks"
|
||||||
|
|
||||||
audio_classifier = pipeline("audio-classification", model=model)
|
audio_classifier = pipeline("audio-classification", model=model)
|
||||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
|
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
|
||||||
|
|
||||||
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
||||||
output = audio_classifier(audio, top_k=4)
|
output = audio_classifier(audio, top_k=4)
|
||||||
|
@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@require_torch
|
@require_torch
|
||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
def test_large_model_pt_with_lm(self):
|
def test_large_model_pt_with_lm(self):
|
||||||
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
|
filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
|
||||||
third_item = next(iter(dataset["test"].skip(3)))
|
|
||||||
filename = third_item["file"]
|
|
||||||
|
|
||||||
speech_recognizer = pipeline(
|
speech_recognizer = pipeline(
|
||||||
task="automatic-speech-recognition",
|
task="automatic-speech-recognition",
|
||||||
@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
chunk_length_s=8,
|
chunk_length_s=8,
|
||||||
stride_length_s=1,
|
stride_length_s=1,
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||||
sample = next(iter(data))
|
sample = next(iter(data))
|
||||||
|
|
||||||
res = pipe(sample["audio"]["array"])
|
res = pipe(sample["audio"]["array"])
|
||||||
@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
stride_length_s=1,
|
stride_length_s=1,
|
||||||
return_language=True,
|
return_language=True,
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||||
sample = next(iter(data))
|
sample = next(iter(data))
|
||||||
|
|
||||||
res = pipe(sample["audio"]["array"])
|
res = pipe(sample["audio"]["array"])
|
||||||
@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
task="automatic-speech-recognition",
|
task="automatic-speech-recognition",
|
||||||
model="openai/whisper-tiny.en",
|
model="openai/whisper-tiny.en",
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||||
samples = [next(iter(data)) for _ in range(8)]
|
samples = [next(iter(data)) for _ in range(8)]
|
||||||
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
||||||
|
|
||||||
@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_speculative_decoding_whisper_non_distil(self):
|
def test_speculative_decoding_whisper_non_distil(self):
|
||||||
# Load data:
|
# Load data:
|
||||||
dataset = load_dataset(
|
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = dataset[0]["audio"]
|
sample = dataset[0]["audio"]
|
||||||
|
|
||||||
# Load model:
|
# Load model:
|
||||||
@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_speculative_decoding_whisper_distil(self):
|
def test_speculative_decoding_whisper_distil(self):
|
||||||
# Load data:
|
# Load data:
|
||||||
dataset = load_dataset(
|
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
|
||||||
)
|
|
||||||
sample = dataset[0]["audio"]
|
sample = dataset[0]["audio"]
|
||||||
|
|
||||||
# Load model:
|
# Load model:
|
||||||
|
@ -601,9 +601,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
|
|
||||||
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
||||||
|
|
||||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
file = image[0]["file"]
|
image = ds[0]["image"].convert("RGB")
|
||||||
outputs = image_segmenter(file, threshold=threshold)
|
outputs = image_segmenter(image, threshold=threshold)
|
||||||
|
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
@ -655,9 +655,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
def test_oneformer(self):
|
def test_oneformer(self):
|
||||||
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
||||||
|
|
||||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
file = image[0]["file"]
|
image = ds[0]["image"].convert("RGB")
|
||||||
outputs = image_segmenter(file, threshold=0.99)
|
outputs = image_segmenter(image, threshold=0.99)
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
o["mask"] = mask_to_test_readable(o["mask"])
|
o["mask"] = mask_to_test_readable(o["mask"])
|
||||||
@ -679,7 +679,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Different task
|
# Different task
|
||||||
outputs = image_segmenter(file, threshold=0.99, subtask="instance")
|
outputs = image_segmenter(image, threshold=0.99, subtask="instance")
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
o["mask"] = mask_to_test_readable(o["mask"])
|
o["mask"] = mask_to_test_readable(o["mask"])
|
||||||
@ -701,7 +701,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Different task
|
# Different task
|
||||||
outputs = image_segmenter(file, subtask="semantic")
|
outputs = image_segmenter(image, subtask="semantic")
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
o["mask"] = mask_to_test_readable(o["mask"])
|
o["mask"] = mask_to_test_readable(o["mask"])
|
||||||
|
Loading…
Reference in New Issue
Block a user