mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
[CI] lazy loading external datasets (#37218)
This commit is contained in:
parent
a0803a9555
commit
2099287a59
@ -25,7 +25,6 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
from transformers.pipelines import AudioClassificationPipeline, pipeline
|
from transformers.pipelines import AudioClassificationPipeline, pipeline
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -46,9 +45,15 @@ if is_torch_available():
|
|||||||
class AudioClassificationPipelineTests(unittest.TestCase):
|
class AudioClassificationPipelineTests(unittest.TestCase):
|
||||||
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
||||||
tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
||||||
|
_dataset = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
_dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls._dataset is None:
|
||||||
|
cls._dataset = datasets.load_dataset(
|
||||||
|
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation"
|
||||||
|
)
|
||||||
|
|
||||||
def get_test_pipeline(
|
def get_test_pipeline(
|
||||||
self,
|
self,
|
||||||
@ -99,6 +104,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
|
|||||||
|
|
||||||
@require_torchaudio
|
@require_torchaudio
|
||||||
def run_torchaudio(self, audio_classifier):
|
def run_torchaudio(self, audio_classifier):
|
||||||
|
self._load_dataset()
|
||||||
# test with a local file
|
# test with a local file
|
||||||
audio = self._dataset[0]["audio"]["array"]
|
audio = self._dataset[0]["audio"]["array"]
|
||||||
output = audio_classifier(audio)
|
output = audio_classifier(audio)
|
||||||
|
@ -21,7 +21,6 @@ from huggingface_hub.utils import insecure_hashlib
|
|||||||
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
|
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
|
||||||
from transformers.pipelines import DepthEstimationPipeline, pipeline
|
from transformers.pipelines import DepthEstimationPipeline, pipeline
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -59,11 +58,15 @@ def hashimage(image: Image) -> str:
|
|||||||
@require_torch
|
@require_torch
|
||||||
class DepthEstimationPipelineTests(unittest.TestCase):
|
class DepthEstimationPipelineTests(unittest.TestCase):
|
||||||
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
|
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
|
||||||
|
_dataset = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls._dataset is None:
|
||||||
# we use revision="refs/pr/1" until the PR is merged
|
# we use revision="refs/pr/1" until the PR is merged
|
||||||
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
||||||
_dataset = datasets.load_dataset(
|
cls._dataset = datasets.load_dataset(
|
||||||
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -90,6 +93,7 @@ class DepthEstimationPipelineTests(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def run_pipeline_test(self, depth_estimator, examples):
|
def run_pipeline_test(self, depth_estimator, examples):
|
||||||
|
self._load_dataset()
|
||||||
outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
||||||
self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs)
|
self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs)
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
from transformers.pipelines import ImageClassificationPipeline, pipeline
|
from transformers.pipelines import ImageClassificationPipeline, pipeline
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -59,11 +58,15 @@ else:
|
|||||||
class ImageClassificationPipelineTests(unittest.TestCase):
|
class ImageClassificationPipelineTests(unittest.TestCase):
|
||||||
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
||||||
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
||||||
|
_dataset = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls._dataset is None:
|
||||||
# we use revision="refs/pr/1" until the PR is merged
|
# we use revision="refs/pr/1" until the PR is merged
|
||||||
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
||||||
_dataset = datasets.load_dataset(
|
cls._dataset = datasets.load_dataset(
|
||||||
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -92,6 +95,7 @@ class ImageClassificationPipelineTests(unittest.TestCase):
|
|||||||
return image_classifier, examples
|
return image_classifier, examples
|
||||||
|
|
||||||
def run_pipeline_test(self, image_classifier, examples):
|
def run_pipeline_test(self, image_classifier, examples):
|
||||||
|
self._load_dataset()
|
||||||
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -37,7 +37,6 @@ from transformers import (
|
|||||||
pipeline,
|
pipeline,
|
||||||
)
|
)
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -89,11 +88,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
|
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
|
||||||
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
|
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
|
||||||
)
|
)
|
||||||
|
_dataset = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls._dataset is None:
|
||||||
# we use revision="refs/pr/1" until the PR is merged
|
# we use revision="refs/pr/1" until the PR is merged
|
||||||
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
||||||
_dataset = datasets.load_dataset(
|
cls._dataset = datasets.load_dataset(
|
||||||
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -120,6 +123,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def run_pipeline_test(self, image_segmenter, examples):
|
def run_pipeline_test(self, image_segmenter, examples):
|
||||||
|
self._load_dataset()
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||||
threshold=0.0,
|
threshold=0.0,
|
||||||
|
@ -25,8 +25,7 @@ from transformers import (
|
|||||||
is_vision_available,
|
is_vision_available,
|
||||||
pipeline,
|
pipeline,
|
||||||
)
|
)
|
||||||
from transformers.testing_utils import ( #
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -57,11 +56,15 @@ else:
|
|||||||
@require_torch
|
@require_torch
|
||||||
class ObjectDetectionPipelineTests(unittest.TestCase):
|
class ObjectDetectionPipelineTests(unittest.TestCase):
|
||||||
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
|
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
|
||||||
|
_dataset = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls._dataset is None:
|
||||||
# we use revision="refs/pr/1" until the PR is merged
|
# we use revision="refs/pr/1" until the PR is merged
|
||||||
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
|
||||||
_dataset = datasets.load_dataset(
|
cls._dataset = datasets.load_dataset(
|
||||||
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -85,6 +88,7 @@ class ObjectDetectionPipelineTests(unittest.TestCase):
|
|||||||
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
|
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
|
||||||
|
|
||||||
def run_pipeline_test(self, object_detector, examples):
|
def run_pipeline_test(self, object_detector, examples):
|
||||||
|
self._load_dataset()
|
||||||
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
|
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
|
||||||
|
|
||||||
self.assertGreater(len(outputs), 0)
|
self.assertGreater(len(outputs), 0)
|
||||||
|
@ -19,7 +19,6 @@ from huggingface_hub import VideoClassificationOutputElement, hf_hub_download
|
|||||||
from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEFeatureExtractor
|
from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEFeatureExtractor
|
||||||
from transformers.pipelines import VideoClassificationPipeline, pipeline
|
from transformers.pipelines import VideoClassificationPipeline, pipeline
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_run_pipeline_tests,
|
|
||||||
compare_pipeline_output_to_hub_spec,
|
compare_pipeline_output_to_hub_spec,
|
||||||
is_pipeline_test,
|
is_pipeline_test,
|
||||||
nested_simplify,
|
nested_simplify,
|
||||||
@ -39,9 +38,13 @@ from .test_pipelines_common import ANY
|
|||||||
@require_av
|
@require_av
|
||||||
class VideoClassificationPipelineTests(unittest.TestCase):
|
class VideoClassificationPipelineTests(unittest.TestCase):
|
||||||
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
|
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
|
||||||
|
example_video_filepath = None
|
||||||
|
|
||||||
if _run_pipeline_tests:
|
@classmethod
|
||||||
example_video_filepath = hf_hub_download(
|
def _load_dataset(cls):
|
||||||
|
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
|
||||||
|
if cls.example_video_filepath is None:
|
||||||
|
cls.example_video_filepath = hf_hub_download(
|
||||||
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
|
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -54,6 +57,7 @@ class VideoClassificationPipelineTests(unittest.TestCase):
|
|||||||
processor=None,
|
processor=None,
|
||||||
torch_dtype="float32",
|
torch_dtype="float32",
|
||||||
):
|
):
|
||||||
|
self._load_dataset()
|
||||||
video_classifier = VideoClassificationPipeline(
|
video_classifier = VideoClassificationPipeline(
|
||||||
model=model,
|
model=model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
|
Loading…
Reference in New Issue
Block a user