mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-17 19:48:23 +06:00
Add Blip and Blip2 for pipeline tests (#21904)
* fix * add to tests * style and quality * add missing --------- Co-authored-by: NielsRogge <NielsRogge@users.noreply.github.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
1325459105
commit
e6de918676
@ -496,6 +496,8 @@ MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
|
|||||||
|
|
||||||
MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
|
MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
|
||||||
[
|
[
|
||||||
|
("blip", "BlipForConditionalGeneration"),
|
||||||
|
("blip-2", "Blip2ForConditionalGeneration"),
|
||||||
("vision-encoder-decoder", "VisionEncoderDecoderModel"),
|
("vision-encoder-decoder", "VisionEncoderDecoderModel"),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -394,7 +394,11 @@ class BlipModelTester:
|
|||||||
@require_torch
|
@require_torch
|
||||||
class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||||
all_model_classes = (BlipModel,) if is_torch_available() else ()
|
all_model_classes = (BlipModel,) if is_torch_available() else ()
|
||||||
pipeline_model_mapping = {"feature-extraction": BlipModel} if is_torch_available() else {}
|
pipeline_model_mapping = (
|
||||||
|
{"feature-extraction": BlipModel, "image-to-text": BlipForConditionalGeneration}
|
||||||
|
if is_torch_available()
|
||||||
|
else {}
|
||||||
|
)
|
||||||
fx_compatible = False
|
fx_compatible = False
|
||||||
test_head_masking = False
|
test_head_masking = False
|
||||||
test_pruning = False
|
test_pruning = False
|
||||||
|
@ -34,6 +34,7 @@ from ...test_modeling_common import (
|
|||||||
ids_tensor,
|
ids_tensor,
|
||||||
random_attention_mask,
|
random_attention_mask,
|
||||||
)
|
)
|
||||||
|
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
@ -584,7 +585,7 @@ class Blip2TextModelTester:
|
|||||||
|
|
||||||
|
|
||||||
# this model tester uses an encoder-decoder language model (T5)
|
# this model tester uses an encoder-decoder language model (T5)
|
||||||
class Blip2ForConditionalGenerationModelTester:
|
class Blip2ModelTester:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, parent, vision_kwargs=None, qformer_kwargs=None, text_kwargs=None, is_training=True, num_query_tokens=10
|
self, parent, vision_kwargs=None, qformer_kwargs=None, text_kwargs=None, is_training=True, num_query_tokens=10
|
||||||
):
|
):
|
||||||
@ -664,8 +665,13 @@ class Blip2ForConditionalGenerationModelTester:
|
|||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
class Blip2ModelTest(ModelTesterMixin, unittest.TestCase):
|
class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||||
all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else ()
|
all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else ()
|
||||||
|
pipeline_model_mapping = (
|
||||||
|
{"feature-extraction": Blip2Model, "image-to-text": Blip2ForConditionalGeneration}
|
||||||
|
if is_torch_available()
|
||||||
|
else {}
|
||||||
|
)
|
||||||
fx_compatible = False
|
fx_compatible = False
|
||||||
test_head_masking = False
|
test_head_masking = False
|
||||||
test_pruning = False
|
test_pruning = False
|
||||||
@ -674,7 +680,7 @@ class Blip2ModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
test_torchscript = False
|
test_torchscript = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = Blip2ForConditionalGenerationModelTester(self)
|
self.model_tester = Blip2ModelTester(self)
|
||||||
|
|
||||||
def test_for_conditional_generation(self):
|
def test_for_conditional_generation(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
@ -55,6 +55,42 @@
|
|||||||
],
|
],
|
||||||
"processor_classes": []
|
"processor_classes": []
|
||||||
},
|
},
|
||||||
|
"BlipModel": {
|
||||||
|
"tokenizer_classes": [
|
||||||
|
"BertTokenizerFast",
|
||||||
|
"BertTokenizer"
|
||||||
|
],
|
||||||
|
"processor_classes": [
|
||||||
|
"BlipImageProcessor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"BlipForConditionalGeneration": {
|
||||||
|
"tokenizer_classes": [
|
||||||
|
"BertTokenizerFast",
|
||||||
|
"BertTokenizer"
|
||||||
|
],
|
||||||
|
"processor_classes": [
|
||||||
|
"BlipImageProcessor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Blip2Model": {
|
||||||
|
"tokenizer_classes": [
|
||||||
|
"GPT2TokenizerFast",
|
||||||
|
"GPT2Tokenizer"
|
||||||
|
],
|
||||||
|
"processor_classes": [
|
||||||
|
"BlipImageProcessor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Blip2ForConditionalGeneration": {
|
||||||
|
"tokenizer_classes": [
|
||||||
|
"GPT2TokenizerFast",
|
||||||
|
"GPT2Tokenizer"
|
||||||
|
],
|
||||||
|
"processor_classes": [
|
||||||
|
"BlipImageProcessor"
|
||||||
|
]
|
||||||
|
},
|
||||||
"BloomModel": {
|
"BloomModel": {
|
||||||
"tokenizer_classes": [
|
"tokenizer_classes": [
|
||||||
"BloomTokenizerFast"
|
"BloomTokenizerFast"
|
||||||
|
@ -410,6 +410,9 @@ def convert_processors(processors, tiny_config, output_folder, result):
|
|||||||
elif isinstance(processor, ProcessorMixin):
|
elif isinstance(processor, ProcessorMixin):
|
||||||
# Currently, we only have these 2 possibilities
|
# Currently, we only have these 2 possibilities
|
||||||
tokenizers.append(processor.tokenizer)
|
tokenizers.append(processor.tokenizer)
|
||||||
|
if hasattr(processor, "image_processor"):
|
||||||
|
feature_extractors.append(processor.image_processor)
|
||||||
|
elif hasattr(processor, "feature_extractor"):
|
||||||
feature_extractors.append(processor.feature_extractor)
|
feature_extractors.append(processor.feature_extractor)
|
||||||
|
|
||||||
# check the built processors have the unique type
|
# check the built processors have the unique type
|
||||||
@ -557,7 +560,7 @@ def upload_model(model_dir, organization):
|
|||||||
repo_exist = False
|
repo_exist = False
|
||||||
error = None
|
error = None
|
||||||
try:
|
try:
|
||||||
create_repo(repo_id=repo_name, organization=organization, exist_ok=False, repo_type="model")
|
create_repo(repo_id=f"{organization}/{repo_name}", exist_ok=False, repo_type="model")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error = e
|
error = e
|
||||||
if "You already created" in str(e):
|
if "You already created" in str(e):
|
||||||
@ -778,7 +781,15 @@ def get_config_overrides(config_class, processors):
|
|||||||
model_tester_kwargs = {"vocab_size": vocab_size}
|
model_tester_kwargs = {"vocab_size": vocab_size}
|
||||||
# CLIP-like models have `text_model_tester` and `vision_model_tester`, and we need to pass `vocab_size` to
|
# CLIP-like models have `text_model_tester` and `vision_model_tester`, and we need to pass `vocab_size` to
|
||||||
# `text_model_tester` via `text_kwargs`. The same trick is also necessary for `Flava`.
|
# `text_model_tester` via `text_kwargs`. The same trick is also necessary for `Flava`.
|
||||||
if config_class.__name__ in ["CLIPConfig", "GroupViTConfig", "OwlViTConfig", "XCLIPConfig", "FlavaConfig"]:
|
if config_class.__name__ in [
|
||||||
|
"CLIPConfig",
|
||||||
|
"GroupViTConfig",
|
||||||
|
"OwlViTConfig",
|
||||||
|
"XCLIPConfig",
|
||||||
|
"FlavaConfig",
|
||||||
|
"BlipConfig",
|
||||||
|
"Blip2Config",
|
||||||
|
]:
|
||||||
del model_tester_kwargs["vocab_size"]
|
del model_tester_kwargs["vocab_size"]
|
||||||
model_tester_kwargs["text_kwargs"] = {"vocab_size": vocab_size}
|
model_tester_kwargs["text_kwargs"] = {"vocab_size": vocab_size}
|
||||||
# `FSMTModelTester` accepts `src_vocab_size` and `tgt_vocab_size` but not `vocab_size`.
|
# `FSMTModelTester` accepts `src_vocab_size` and `tgt_vocab_size` but not `vocab_size`.
|
||||||
|
Loading…
Reference in New Issue
Block a user