fix can_generate (#36570)

* fix can_generate

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix can generate for speecht5 and blip

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix speecht5 tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
This commit is contained in:
jiqing-feng 2025-03-17 21:56:18 +08:00 committed by GitHub
parent 9e94801146
commit 2256875a77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 19 additions and 1 deletions

View File

@ -1233,7 +1233,7 @@ class BlipForConditionalGeneration(BlipPreTrainedModel, GenerationMixin):
""",
BLIP_START_DOCSTRING,
)
class BlipForQuestionAnswering(BlipPreTrainedModel):
class BlipForQuestionAnswering(BlipPreTrainedModel, GenerationMixin):
config_class = BlipConfig
_tied_weights_keys = ["text_decoder.cls.predictions.decoder.bias"]

View File

@ -2631,6 +2631,13 @@ class SpeechT5ForTextToSpeech(SpeechT5PreTrainedModel):
# Initialize weights and apply final processing
self.post_init()
@classmethod
def can_generate(cls) -> bool:
# Speecht5 has a unique model structure, where the external class (`SpeechT5ForTextToSpeech`) doesn't need to inherit from
# `GenerationMixin` (it has a non-standard generation method). This means that the base `can_generate()` will return `False`,
# but we need to override it so as to do `GenerationConfig` handling in multiple parts of the codebase.
return True
def get_encoder(self):
return self.speecht5.get_encoder()

View File

@ -1076,6 +1076,10 @@ class BarkModelIntegrationTests(unittest.TestCase):
fine_generation_config = BarkFineGenerationConfig(**self.model.generation_config.fine_acoustics_config)
return fine_generation_config
def test_model_can_generate(self):
# Bark has custom generate without inheriting GenerationMixin. This test could prevent regression.
self.assertTrue(self.model.can_generate())
@slow
def test_generate_semantic(self):
input_ids = self.inputs

View File

@ -881,6 +881,7 @@ class SpeechT5ForTextToSpeechTester:
@require_torch
class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (SpeechT5ForTextToSpeech,) if is_torch_available() else ()
all_generative_model_classes = ()
is_encoder_decoder = True
test_pruning = False
test_headmasking = False
@ -892,6 +893,12 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase):
def test_config(self):
self.config_tester.run_common_tests()
def test_model_can_generate(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:
model = model_class(config)
self.assertTrue(model.can_generate())
def test_save_load_strict(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes: