diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index b5feba6a300..54ea0e23b35 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -94,44 +94,42 @@ if is_torch_available(): class GenerationTesterMixin: model_tester = None all_generative_model_classes = () - input_name = "input_ids" max_new_tokens = 3 - def _get_input_ids_and_config(self, batch_size=2): + def prepare_config_and_inputs_for_generate(self, batch_size=2): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - # TODO: @raushan or @gante, use `model.main_input_name` as the main input instead of relyinn on `input_ids` - input_ids = inputs_dict.pop(self.input_name)[:batch_size, :] - inputs_dict.pop("attention_mask", None) - # we don't want encoder-decoder models to start from filled decoder ids - inputs_dict.pop("decoder_input_ids", None) - inputs_dict.pop("decoder_attention_mask", None) - - # we'll set cache use in each test differently - inputs_dict.pop("use_cache", None) - - inputs_dict = { - k: v[:batch_size, ...] + # We don't want a few model inputs in our model input dictionary for generation tests + input_keys_to_ignore = [ + # we don't want to mask attention heads + "head_mask", + "decoder_head_mask", + "cross_attn_head_mask", + # we don't want encoder-decoder models to start from filled decoder ids + "decoder_input_ids", + "decoder_attention_mask", + # we'll set cache use in each test differently + "use_cache", + # model-specific exceptions should overload/overwrite this function + ] + filtered_inputs_dict = { + k: v[:batch_size, ...] if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) + if k not in input_keys_to_ignore } - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - if self.has_attentions: - attention_mask = torch.ones_like(input_ids, dtype=torch.long) - else: - attention_mask = None + # It is important set `eos_token_id` to `None` to avoid early stopping (would break for length-based checks) + text_gen_config = config.get_text_config(decoder=True) + if text_gen_config.eos_token_id is not None and text_gen_config.pad_token_id is None: + text_gen_config.pad_token_id = ( + text_gen_config.eos_token_id + if isinstance(text_gen_config.eos_token_id, int) + else text_gen_config.eos_token_id[0] + ) + text_gen_config.eos_token_id = None + text_gen_config.forced_eos_token_id = None - # It is important set set the eos_token_id to None to ensure that no sequences - # shorter than `max_length` can be generated - config.eos_token_id = None - config.forced_eos_token_id = None - - return config, input_ids, attention_mask, inputs_dict + return config, filtered_inputs_dict def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = { @@ -193,8 +191,6 @@ class GenerationTesterMixin: def _greedy_generate( self, model, - input_ids, - attention_mask, inputs_dict, output_scores=False, output_logits=False, @@ -204,9 +200,7 @@ class GenerationTesterMixin: use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, num_beams=1, max_new_tokens=self.max_new_tokens, @@ -217,7 +211,6 @@ class GenerationTesterMixin: return_dict_in_generate=return_dict_in_generate, use_cache=use_cache, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -226,8 +219,6 @@ class GenerationTesterMixin: def _sample_generate( self, model, - input_ids, - attention_mask, inputs_dict, num_return_sequences, output_scores=False, @@ -239,9 +230,7 @@ class GenerationTesterMixin: ): torch.manual_seed(0) logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=True, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=True, num_beams=1, max_new_tokens=self.max_new_tokens, @@ -253,7 +242,6 @@ class GenerationTesterMixin: return_dict_in_generate=return_dict_in_generate, use_cache=use_cache, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -262,8 +250,6 @@ class GenerationTesterMixin: def _beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -274,9 +260,7 @@ class GenerationTesterMixin: use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -287,7 +271,6 @@ class GenerationTesterMixin: use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -296,8 +279,6 @@ class GenerationTesterMixin: def _beam_sample_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -309,9 +290,7 @@ class GenerationTesterMixin: ): torch.manual_seed(0) logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=True, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=True, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -322,7 +301,6 @@ class GenerationTesterMixin: use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -331,8 +309,6 @@ class GenerationTesterMixin: def _group_beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -343,9 +319,7 @@ class GenerationTesterMixin: use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -356,7 +330,6 @@ class GenerationTesterMixin: use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -365,8 +338,6 @@ class GenerationTesterMixin: def _constrained_beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, constraints, beam_kwargs, @@ -378,9 +349,7 @@ class GenerationTesterMixin: use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -392,7 +361,6 @@ class GenerationTesterMixin: use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -401,8 +369,6 @@ class GenerationTesterMixin: def _contrastive_generate( self, model, - input_ids, - attention_mask, inputs_dict, output_scores=False, output_logits=False, @@ -417,9 +383,7 @@ class GenerationTesterMixin: } logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, num_beams=1, max_new_tokens=self.max_new_tokens, @@ -430,7 +394,6 @@ class GenerationTesterMixin: return_dict_in_generate=return_dict_in_generate, use_cache=use_cache, **logits_processor_kwargs, - **model_kwargs, **contrastive_search_kwargs, **inputs_dict, ) @@ -440,28 +403,26 @@ class GenerationTesterMixin: @pytest.mark.generate def test_greedy_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, input_ids=input_ids, attention_mask=attention_mask, inputs_dict=inputs_dict - ) + output_generate = self._greedy_generate(model=model, inputs_dict=inputs_dict) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_greedy_generate_dict_outputs(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, output_scores=True, output_logits=True, @@ -477,17 +438,18 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput) - self._check_outputs(output_generate, input_ids, model.config) + self._check_outputs(output_generate, main_input, model.config) @pytest.mark.generate def test_greedy_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] if not hasattr(config, "use_cache"): self.skipTest(reason=f"{model_class.__name__} doesn't support caching") @@ -498,53 +460,45 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, output_scores=True, output_logits=True, output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) - self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + self._check_outputs(output_generate, main_input, model.config, use_cache=True) @pytest.mark.generate def test_sample_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() - output_generate = self._sample_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - num_return_sequences=1, - ) + output_generate = self._sample_generate(model=model, inputs_dict=inputs_dict, num_return_sequences=1) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, num_return_sequences=2, output_scores=True, @@ -561,45 +515,39 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, SampleEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, SampleDecoderOnlyOutput) - self._check_outputs(output_generate, input_ids, model.config, num_return_sequences=2) + self._check_outputs(output_generate, main_input, model.config, num_return_sequences=2) @pytest.mark.generate def test_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() - output_generate = self._beam_search_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - beam_kwargs=beam_kwargs, - ) + output_generate = self._beam_search_generate(model=model, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -615,20 +563,20 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate def test_beam_search_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: - # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] if not hasattr(config, "use_cache"): self.skipTest(reason=f"{model_class.__name__} doesn't support caching") @@ -642,8 +590,6 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() output_generate = self._beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -651,16 +597,20 @@ class GenerationTesterMixin: output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self._check_outputs( - output_generate, input_ids, model.config, use_cache=True, num_return_sequences=beam_kwargs["num_beams"] + output_generate, + main_input, + model.config, + use_cache=True, + num_return_sequences=beam_kwargs["num_beams"], ) @require_accelerate @@ -674,7 +624,7 @@ class GenerationTesterMixin: if model_class._no_split_modules is None: continue - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() model = model_class(config).eval() with tempfile.TemporaryDirectory() as tmp_dir: @@ -682,8 +632,6 @@ class GenerationTesterMixin: new_model = model_class.from_pretrained(tmp_dir, device_map="auto") new_model.generate( - input_ids, - attention_mask=attention_mask, max_new_tokens=self.max_new_tokens, num_beams=2, **inputs_dict, @@ -692,14 +640,13 @@ class GenerationTesterMixin: @pytest.mark.generate def test_beam_sample_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) @@ -707,7 +654,7 @@ class GenerationTesterMixin: if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) # for VLMs inputs embeds won't match input ids unless images are encoded and merged with ids properly # no quick fix available, since obtaining image embeddings step is very model-specific @@ -721,12 +668,11 @@ class GenerationTesterMixin: "inputs_embeds" in prepare_inputs_for_generation_args and "cache_positions" in prepare_inputs_for_generation_args ): - input_embeds = model.get_input_embeddings()(input_ids) + input_embeds = model.get_input_embeddings()(inputs_dict["input_ids"]) beam_kwargs.update({"inputs_embeds": input_embeds}) output_generate2 = self._beam_sample_generate( model=model, input_ids=None, - attention_mask=attention_mask, inputs_dict={}, beam_kwargs=beam_kwargs, ) @@ -736,15 +682,14 @@ class GenerationTesterMixin: @pytest.mark.generate def test_beam_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -761,18 +706,18 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, BeamSampleEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSampleDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate def test_generate_without_input_ids(self): - config, _, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() # if no bos token id => cannot generate from None if config.bos_token_id is None: @@ -794,49 +739,45 @@ class GenerationTesterMixin: @pytest.mark.generate def test_group_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() # check `generate()` and `group_beam_search()` are equal beam_kwargs = self._get_diverse_beam_kwargs() output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) # check `group_beam_search` for higher than 1 `num_return_sequences` num_return_sequences = 2 beam_kwargs = self._get_diverse_beam_kwargs(num_return_sequences=num_return_sequences) output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_group_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_diverse_beam_kwargs() output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -852,21 +793,22 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) - # TODO: @gante + # TODO: @gante check why it is flaky @is_flaky() @pytest.mark.generate def test_constrained_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() @@ -882,8 +824,6 @@ class GenerationTesterMixin: beam_kwargs = self._get_constrained_beam_kwargs() output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -892,7 +832,7 @@ class GenerationTesterMixin: if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) for generation_output in output_generate: self._check_sequence_inside_sequence(force_tokens, generation_output) @@ -908,8 +848,6 @@ class GenerationTesterMixin: output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -918,7 +856,7 @@ class GenerationTesterMixin: if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) for generation_output in output_generate: self._check_sequence_inside_sequence(force_tokens, generation_output) @@ -926,7 +864,8 @@ class GenerationTesterMixin: @pytest.mark.generate def test_constrained_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() @@ -941,8 +880,6 @@ class GenerationTesterMixin: beam_kwargs = self._get_constrained_beam_kwargs() output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -960,13 +897,13 @@ class GenerationTesterMixin: # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate @@ -979,7 +916,8 @@ class GenerationTesterMixin: if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]): self.skipTest(reason="Won't fix: old model with different cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -990,15 +928,13 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() output_generate = self._contrastive_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_contrastive_generate_dict_outputs_use_cache(self): @@ -1010,7 +946,8 @@ class GenerationTesterMixin: if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]): self.skipTest(reason="Won't fix: old model with different cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1020,23 +957,21 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() output_generate = self._contrastive_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, output_scores=True, output_logits=True, output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) - self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + self._check_outputs(output_generate, main_input, model.config, use_cache=True) @pytest.mark.generate def test_contrastive_generate_low_memory(self): @@ -1050,7 +985,7 @@ class GenerationTesterMixin: if any(model_name in model_class.__name__.lower() for model_name in ["gptbigcode"]): self.skipTest(reason="TODO: fix me") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1062,23 +997,19 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() low_output = model.generate( - input_ids, top_k=4, penalty_alpha=0.6, low_memory=True, max_new_tokens=self.max_new_tokens, - attention_mask=attention_mask, **inputs_dict, use_cache=True, ) high_output = model.generate( - input_ids, top_k=4, penalty_alpha=0.6, low_memory=False, max_new_tokens=self.max_new_tokens, - attention_mask=attention_mask, **inputs_dict, use_cache=True, ) @@ -1105,7 +1036,8 @@ class GenerationTesterMixin: ] ): self.skipTest(reason="May fix in the future: need model-specific fixes") - config, input_ids, _, _ = self._get_input_ids_and_config(batch_size=2) + + config, inputs_dict = self.prepare_config_and_inputs_for_generate() # batch_size=1 is ok, but batch_size>1 will cause non-identical output config.use_cache = True @@ -1115,7 +1047,7 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() low_output = model.generate( - input_ids, + **inputs_dict, max_new_tokens=8, num_beams=5, early_stopping=True, @@ -1124,7 +1056,7 @@ class GenerationTesterMixin: ) high_output = model.generate( - input_ids, + **inputs_dict, max_new_tokens=8, num_beams=5, early_stopping=True, @@ -1169,7 +1101,8 @@ class GenerationTesterMixin: self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1195,9 +1128,7 @@ class GenerationTesterMixin: "return_dict_in_generate": True, "use_cache": True, } - output_greedy = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_greedy = model.generate(**generation_kwargs, **inputs_dict) # test with the same assistant model or randomly init one # in the first case all candidate tokens are accepted, in the second none is accepted @@ -1209,15 +1140,13 @@ class GenerationTesterMixin: assistant_model.generation_config.num_assistant_tokens = 2 # see b) assistant_model.generation_config.num_assistant_tokens_schedule = "constant" # see b) generation_kwargs.update({"assistant_model": assistant_model}) - output_assisted = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_assisted = model.generate(**generation_kwargs, **inputs_dict) # The two outputs must match and their shape must be as expected self.assertListEqual(output_greedy.sequences.tolist(), output_assisted.sequences.tolist()) for output in (output_greedy, output_assisted): - self._check_outputs(output, input_ids, model.config, use_cache=True) + self._check_outputs(output, main_input, model.config, use_cache=True) @is_flaky() @pytest.mark.generate @@ -1246,7 +1175,8 @@ class GenerationTesterMixin: self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1273,20 +1203,16 @@ class GenerationTesterMixin: "use_cache": True, } - output_greedy = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_greedy = model.generate(**generation_kwargs, **inputs_dict) generation_kwargs.update({"prompt_lookup_num_tokens": 2}) # see b) - output_prompt_lookup = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_prompt_lookup = model.generate(**generation_kwargs, **inputs_dict) # The two outputs must match and their shape must be as expected self.assertListEqual(output_greedy.sequences.tolist(), output_prompt_lookup.sequences.tolist()) for output in (output_greedy, output_prompt_lookup): - self._check_outputs(output, input_ids, model.config, use_cache=True) + self._check_outputs(output, main_input, model.config, use_cache=True) @pytest.mark.generate def test_dola_decoding_sample(self): @@ -1302,7 +1228,8 @@ class GenerationTesterMixin: self.skipTest("DoLa is not supported for models that don't return layerwise hidden states") # enable cache if the model is not openai-gpt, xlnet, cpm, or xlm - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] # Encoder-decoder models are not supported if config.is_encoder_decoder: @@ -1326,12 +1253,11 @@ class GenerationTesterMixin: "output_hidden_states": True, "output_attentions": self.has_attentions, "return_dict_in_generate": True, - "use_cache": hasattr(config, "use_cache"), # Some models don't support the cache + "use_cache": getattr(config, "use_cache", False), # Some models don't support the cache + "dola_layers": "low", } - generation_kwargs.update({"dola_layers": "low"}) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_dola = model.generate(input_ids, **model_kwargs, **generation_kwargs, **inputs_dict) - self._check_outputs(output_dola, input_ids, model.config, use_cache=hasattr(config, "use_cache")) + output_dola = model.generate(**generation_kwargs, **inputs_dict) + self._check_outputs(output_dola, main_input, model.config, use_cache=getattr(config, "use_cache", False)) @pytest.mark.generate def test_assisted_decoding_sample(self): @@ -1359,7 +1285,8 @@ class GenerationTesterMixin: self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1389,11 +1316,9 @@ class GenerationTesterMixin: "return_dict_in_generate": True, "use_cache": True, } - output_assisted = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_assisted = model.generate(**generation_kwargs, **inputs_dict) - self._check_outputs(output_assisted, input_ids, config, use_cache=True) + self._check_outputs(output_assisted, main_input, config, use_cache=True) @pytest.mark.generate def test_prompt_lookup_decoding_stops_at_eos(self): @@ -1429,7 +1354,8 @@ class GenerationTesterMixin: """Test designed for encoder-decoder models to ensure the attention head masking is used.""" attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"] for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + # We want to test only encoder-decoder models if not config.is_encoder_decoder: continue @@ -1452,8 +1378,6 @@ class GenerationTesterMixin: for attn_name, (name, mask) in zip(attention_names, head_masking.items()): out = model.generate( - input_ids, - attention_mask=attention_mask, num_beams=1, output_attentions=self.has_attentions, return_dict_in_generate=True, @@ -1482,7 +1406,7 @@ class GenerationTesterMixin: # - The model must be a decoder-only architecture (encoder-based architectures use right-padding) decoder_only_classes = [] for model_class in self.all_generative_model_classes: - config, _, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() if config.is_encoder_decoder: continue else: @@ -1515,7 +1439,12 @@ class GenerationTesterMixin: return model_kwargs for model_class in decoder_only_classes: - config, input_ids, attention_mask, _ = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + input_ids = inputs_dict["input_ids"] + attention_mask = inputs_dict.get("attention_mask") + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + model = model_class(config).to(torch_device).eval() signature = inspect.signature(model.forward).parameters.keys() @@ -1618,7 +1547,7 @@ class GenerationTesterMixin: # When supported, tests that the decoder model can generate from `inputs_embeds` instead of `input_ids` # if fails, you should probably update the `prepare_inputs_for_generation` function for model_class in self.all_generative_model_classes: - config, input_ids, _, _ = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() # Ignore: # a) eos (to always output 20 tokens) and pad (so we don't try to infer the attn mask from the input_ids, @@ -1639,6 +1568,8 @@ class GenerationTesterMixin: if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): continue + input_ids = inputs_dict.pop("input_ids") + # Traditional way of generating text outputs_from_ids = model.generate( input_ids, max_new_tokens=5, return_dict_in_generate=True, output_scores=True @@ -1689,7 +1620,8 @@ class GenerationTesterMixin: if not model_class._supports_static_cache: self.skipTest(reason="This model does not support the static cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") @@ -1697,9 +1629,11 @@ class GenerationTesterMixin: if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): self.skipTest(reason="This model does not support `inputs_embeds` in generation") + input_ids = inputs_dict.pop("input_ids") + model.config.use_cache = True model.config.is_decoder = True - batch_size, seq_length = input_ids.shape + batch_size = input_ids.shape[0] max_cache_len = 30 # here we force to not stop at eos and go until max-length @@ -1724,9 +1658,7 @@ class GenerationTesterMixin: num_hidden_layers = text_config.num_hidden_layers inputs_embeds = model.get_input_embeddings()(input_ids) - outputs = model.generate( - inputs_embeds=inputs_embeds, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + outputs = model.generate(inputs_embeds=inputs_embeds, **generation_kwargs, **inputs_dict) # we should get `max_length` in shape, not `max_length - embeds_length` cache_shape = (batch_size, num_key_value_heads, max_cache_len, head_dim) @@ -1827,7 +1759,7 @@ class GenerationTesterMixin: if not model_class._supports_cache_class: self.skipTest(reason="This model does not support the new cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() model = model_class(config).to(torch_device).eval() generation_kwargs = { @@ -1842,9 +1774,7 @@ class GenerationTesterMixin: # Sets seed before calling `generate` for the case with do_sample=True seed = torch.randint(0, 1000000, (1,)).item() set_seed(seed) - legacy_results = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + legacy_results = model.generate(**generation_kwargs, **inputs_dict) set_seed(seed) num_hidden_layers = config.get_text_config().num_hidden_layers if config.is_encoder_decoder: @@ -1852,14 +1782,8 @@ class GenerationTesterMixin: past_key_values = cache_cls(DynamicCache(num_hidden_layers), DynamicCache(num_hidden_layers)) else: cache_cls = DynamicCache - past_key_values = cache_cls(num_hidden_layers) - new_results = model.generate( - input_ids, - attention_mask=attention_mask, - past_key_values=past_key_values, - **generation_kwargs, - **inputs_dict, - ) + past_key_values = cache_cls() + new_results = model.generate(past_key_values=past_key_values, **generation_kwargs, **inputs_dict) # The two sets of generated sequences must match, despite the cache format between forward passes being # different @@ -1906,12 +1830,15 @@ class GenerationTesterMixin: if not model_class._supports_static_cache: self.skipTest(reason="This model does not support the static cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[model_class.main_input_name] + if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") config.is_decoder = True - batch_size, seq_length = input_ids.shape + batch_size = main_input.shape[0] + seq_length = main_input.shape[-1] max_new_tokens = 20 model = model_class(config).to(torch_device).eval() @@ -1934,7 +1861,7 @@ class GenerationTesterMixin: else config.num_key_value_heads ) num_hidden_layers = config.num_hidden_layers - results = model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict) + results = model.generate(**generation_kwargs, **inputs_dict) cache_shape = (batch_size, num_key_value_heads, max_cache_len, head_dim) self.assertTrue(isinstance(results.past_key_values, StaticCache)) @@ -1948,7 +1875,7 @@ class GenerationTesterMixin: if not model_class._supports_quantized_cache: self.skipTest(reason="This model does not support the quantized cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() config.is_decoder = True model = model_class(config).to(torch_device).eval() @@ -1961,23 +1888,17 @@ class GenerationTesterMixin: "use_cache": True, } - results = model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict) + results = model.generate(**generation_kwargs, **inputs_dict) self.assertTrue(isinstance(results.past_key_values, QuantoQuantizedCache)) # passing past key values of different type should raise Error with self.assertRaises(ValueError): - num_hidden_layers = config.get_text_config().num_hidden_layers - model.generate( - input_ids, - attention_mask=attention_mask, - past_key_valyes=DynamicCache(num_hidden_layers), - **generation_kwargs, - ) + model.generate(past_key_valyes=DynamicCache(), **generation_kwargs, **inputs_dict) # setting incorrect cache_config args should raise an Error, i.e. nbits=60 does not make sense generation_kwargs["cache_config"] = {"nbits": 60, "q_group_size": 8, "residual_length": 128} with self.assertRaises(ValueError): - model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs) + model.generate(**generation_kwargs, **inputs_dict) @pytest.mark.generate @require_torch_gpu @@ -2040,7 +1961,7 @@ class GenerationTesterMixin: if "num_logits_to_keep" not in set(inspect.signature(model_class.forward).parameters.keys()): self.skipTest(reason="This model does not support `num_logits_to_keep` argument.") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() config.use_cache = True config.is_decoder = True @@ -2054,13 +1975,9 @@ class GenerationTesterMixin: } # Setting num_logits_to_keep at 0 keeps all logits (old behavior) - with_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict, num_logits_to_keep=0 - ) + with_all_logits = model.generate(**generation_kwargs, **inputs_dict, num_logits_to_keep=0) # By default, num_logits_to_keep is automatically set to 1 if not provided (new behavior) - without_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **inputs_dict, **generation_kwargs - ) + without_all_logits = model.generate(**inputs_dict, **generation_kwargs) self.assertEqual(with_all_logits.tolist(), without_all_logits.tolist()) @pytest.mark.generate @@ -2072,7 +1989,7 @@ class GenerationTesterMixin: if model_class._is_stateful: self.skipTest(reason="Stateful models don't support assisted generation") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) config.use_cache = True config.is_decoder = True @@ -2089,13 +2006,9 @@ class GenerationTesterMixin: assistant_model.generation_config.assistant_confidence_threshold = None # Setting num_logits_to_keep at 0 keeps all logits (old behavior) - with_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict, num_logits_to_keep=0 - ) + with_all_logits = model.generate(**generation_kwargs, **inputs_dict, num_logits_to_keep=0) # By default, num_logits_to_keep is automatically set to 1 if not provided (new behavior) - without_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **inputs_dict, **generation_kwargs - ) + without_all_logits = model.generate(**inputs_dict, **generation_kwargs) self.assertEqual(with_all_logits.tolist(), without_all_logits.tolist()) @pytest.mark.generate @@ -2107,8 +2020,9 @@ class GenerationTesterMixin: for model_class in self.all_generative_model_classes: self.assertTrue("GenerationMixin" in str(model_class.__bases__)) - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, seq_length = input_ids.shape + def _check_outputs(self, output, main_input, config, use_cache=False, num_return_sequences=1): + batch_size = main_input.shape[0] + seq_length = main_input.shape[-1] config = config.text_config if hasattr(config, "text_config") else config num_sequences_in_output = batch_size * num_return_sequences @@ -2116,6 +2030,10 @@ class GenerationTesterMixin: output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length ) + # in some models we subsample the sequence length in inner layers + if hasattr(self.model_tester, "get_subsampled_output_lengths"): + seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) + # scores self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py index 0f28fc2d67b..eae9ee9fbf5 100644 --- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py +++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py @@ -283,28 +283,6 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT return False - # overwrite from GenerationTesterMixin to solve problem - # with conflicting random seeds - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.attention_type = "original_full" - - input_ids = inputs_dict.pop(self.input_name) - _ = inputs_dict.pop("attention_mask", None) - _ = inputs_dict.pop("decoder_input_ids", None) - _ = inputs_dict.pop("decoder_attention_mask", None) - attention_mask = torch.ones_like(input_ids, dtype=torch.long) - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - attention_mask = attention_mask[:batch_size, :sequence_length] - - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - config.pad_token_id = config.eos_token_id - return config, input_ids, attention_mask, inputs_dict - def setUp(self): self.model_tester = BigBirdPegasusModelTester(self) self.config_tester = ConfigTester(self, config_class=BigBirdPegasusConfig) @@ -485,6 +463,13 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT def test_load_save_without_tied_weights(self): pass + def test_generate_with_head_masking(self): + # overwritten to temporarily switch the attention type to `original_full` + original_self_attention_type = self.model_tester.attention_type + self.model_tester.attention_type = "original_full" + super().test_generate_with_head_masking() + self.model_tester.attention_type = original_self_attention_type + @require_torch @require_sentencepiece diff --git a/tests/models/chameleon/test_modeling_chameleon.py b/tests/models/chameleon/test_modeling_chameleon.py index 00e3ad40a57..aad26ef147e 100644 --- a/tests/models/chameleon/test_modeling_chameleon.py +++ b/tests/models/chameleon/test_modeling_chameleon.py @@ -116,7 +116,7 @@ class ChameleonModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) sequence_labels = None token_labels = None diff --git a/tests/models/cohere/test_modeling_cohere.py b/tests/models/cohere/test_modeling_cohere.py index d80bc5c24cf..7d12dd3d873 100644 --- a/tests/models/cohere/test_modeling_cohere.py +++ b/tests/models/cohere/test_modeling_cohere.py @@ -95,7 +95,7 @@ class CohereModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/dac/test_modeling_dac.py b/tests/models/dac/test_modeling_dac.py index ffe7f31b79a..e3b729d2f10 100644 --- a/tests/models/dac/test_modeling_dac.py +++ b/tests/models/dac/test_modeling_dac.py @@ -123,7 +123,6 @@ class DacModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False pipeline_model_mapping = {"feature-extraction": DacModel} if is_torch_available() else {} - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does not have attention and does not support returning hidden states diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index cff297be8e0..2aac4dba82e 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -141,7 +141,6 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) test_headmasking = False test_resize_embeddings = False pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {} - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does not have attention and does not support returning hidden states diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index a02541d5854..6422133d75e 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -119,7 +119,7 @@ class GemmaModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py index 0f4d7640a1b..9b25698f640 100644 --- a/tests/models/granite/test_modeling_granite.py +++ b/tests/models/granite/test_modeling_granite.py @@ -106,7 +106,7 @@ class GraniteModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py index 158259ed5fb..d5d0cee6daa 100644 --- a/tests/models/granitemoe/test_modeling_granitemoe.py +++ b/tests/models/granitemoe/test_modeling_granitemoe.py @@ -105,7 +105,7 @@ class GraniteMoeModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py index a4d81ab2e1c..f1eb2b3929b 100644 --- a/tests/models/led/test_modeling_led.py +++ b/tests/models/led/test_modeling_led.py @@ -338,13 +338,11 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_global_attention(*config_and_inputs) - def _get_input_ids_and_config(self, batch_size=2): - config, input_ids, attention_mask, inputs_dict = GenerationTesterMixin._get_input_ids_and_config( - self, batch_size=batch_size - ) + def prepare_config_and_inputs_for_generate(self, *args, **kwargs): + config, inputs_dict = super().prepare_config_and_inputs_for_generate(*args, **kwargs) # LED computes attention scores based on mask indices if `is_global` inputs_dict.pop("global_attention_mask") - return config, input_ids, attention_mask, inputs_dict + return config, inputs_dict # LEDForSequenceClassification does not support inputs_embeds def test_inputs_embeds(self): diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index a21665c822f..6b273bce7a1 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -112,7 +112,7 @@ class LlamaModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py index dd0f77421be..ab6184ce2bb 100644 --- a/tests/models/mimi/test_modeling_mimi.py +++ b/tests/models/mimi/test_modeling_mimi.py @@ -170,7 +170,6 @@ class MimiModelTest(ModelTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False test_torchscript = False - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does support returning hidden states diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 0730f8ba444..88140b1a20f 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -112,7 +112,7 @@ class MistralModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index db9641e3dcb..836d38e904c 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -108,7 +108,7 @@ class MixtralModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index a385a18b91c..cc30238c8df 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -60,10 +60,6 @@ if is_torch_available(): MusicgenModel, set_seed, ) - from transformers.generation import ( - GenerateDecoderOnlyOutput, - GenerateEncoderDecoderOutput, - ) def _config_zero_init(config): @@ -124,6 +120,7 @@ class MusicgenDecoderTester: pad_token_id=99, bos_token_id=99, num_codebooks=4, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -141,6 +138,7 @@ class MusicgenDecoderTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id self.num_codebooks = num_codebooks + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size * self.num_codebooks, self.seq_length], self.vocab_size) @@ -166,6 +164,7 @@ class MusicgenDecoderTester: bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) return config @@ -282,47 +281,15 @@ class MusicgenDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste def test_tied_weights_keys(self): pass - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - _ = inputs_dict.pop("attention_mask", None) - inputs_dict = { - k: v[:batch_size, ...] - for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) - } - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[: batch_size * config.num_codebooks, :] - - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - return config, input_ids, attention_mask, inputs_dict - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - config.audio_channels = 2 - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - inputs_dict={}, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @require_flash_attn @require_torch_gpu @@ -998,6 +965,7 @@ class MusicgenTester: num_codebooks=4, num_filters=4, codebook_size=128, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -1017,6 +985,7 @@ class MusicgenTester: self.num_codebooks = num_codebooks self.num_filters = num_filters self.codebook_size = codebook_size + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) @@ -1052,6 +1021,7 @@ class MusicgenTester: bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) config = MusicgenConfig.from_sub_models_config(text_encoder_config, audio_encoder_config, decoder_config) return config @@ -1415,170 +1385,10 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, lm_heads = model.get_output_embeddings() self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear)) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[:batch_size, :] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - - return config, input_ids, attention_mask - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are - # different modalities -> different shapes) - def _greedy_generate( - self, - model, - input_ids, - attention_mask, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=False, - num_beams=1, - max_new_tokens=self.max_new_tokens, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - output_scores=output_scores, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are - # different modalities -> different shapes) - def _sample_generate( - self, - model, - input_ids, - attention_mask, - num_return_sequences, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - torch.manual_seed(0) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=True, - num_beams=1, - max_new_tokens=self.max_new_tokens, - num_return_sequences=num_return_sequences, - output_scores=output_scores, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_greedy_generate_dict_outputs(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - self.assertNotIn(config.pad_token_id, output_generate) - - def test_greedy_generate_dict_outputs_use_cache(self): - for model_class in self.greedy_sample_model_classes: - # enable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - - config.use_cache = True - config.is_decoder = True - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - def test_sample_generate(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - model = model_class(config).to(torch_device).eval() - - # check `generate()` and `sample()` are equal - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=1, - ) - self.assertIsInstance(output_generate, torch.Tensor) - - def test_sample_generate_dict_output(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=3, - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - def test_generate_without_input_ids(self): - config, _, _ = self._get_input_ids_and_config() - - # if no bos token id => cannot generate from None - if config.bos_token_id is None: - self.skipTest(reason="bos_token_id is None") - - for model_class in self.greedy_sample_model_classes: - model = model_class(config).to(torch_device) - model.eval() - - output_ids_generate = model.generate( - do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True - ) - self.assertIsNotNone(output_ids_generate) - @require_torch_fp16 @require_torch_accelerator # not all operations are supported in fp16 on CPU def test_generate_fp16(self): @@ -1595,24 +1405,10 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, ) def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.audio_channels = 2 - - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @unittest.skip( reason="MusicgenModel is actually not the base of MusicgenForCausalLM as the latter is a composit model" diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index e8584e238d3..35af9fe0768 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -61,9 +61,6 @@ if is_torch_available(): MusicgenMelodyModel, set_seed, ) - from transformers.generation import ( - GenerateDecoderOnlyOutput, - ) if is_torchaudio_available(): from transformers import MusicgenMelodyProcessor @@ -124,6 +121,7 @@ class MusicgenMelodyDecoderTester: bos_token_id=99, num_codebooks=4, conditional_seq_length=4, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -143,6 +141,7 @@ class MusicgenMelodyDecoderTester: self.num_codebooks = num_codebooks self.conditional_seq_length = conditional_seq_length self.encoder_seq_length = conditional_seq_length + seq_length + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size * self.num_codebooks, self.seq_length], self.vocab_size) @@ -168,6 +167,7 @@ class MusicgenMelodyDecoderTester: bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) return config @@ -285,46 +285,15 @@ class MusicgenMelodyDecoderTest(ModelTesterMixin, GenerationTesterMixin, unittes def test_tied_weights_keys(self): pass - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - _ = inputs_dict.pop("attention_mask", None) - inputs_dict = { - k: v[:batch_size, ...] - for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) - } - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[: batch_size * config.num_codebooks, :] - - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - return config, input_ids, attention_mask, inputs_dict - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask, _ = self._get_input_ids_and_config() - config.audio_channels = 2 - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - inputs_dict={}, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @require_flash_attn @require_torch_gpu @@ -996,6 +965,7 @@ class MusicgenMelodyTester: codebook_size=128, conditional_seq_length=3, chroma_length=24, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -1018,6 +988,7 @@ class MusicgenMelodyTester: self.conditional_seq_length = conditional_seq_length self.chroma_length = chroma_length self.encoder_seq_length = conditional_seq_length + seq_length + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.conditional_seq_length], self.vocab_size) @@ -1053,6 +1024,7 @@ class MusicgenMelodyTester: bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) config = MusicgenMelodyConfig.from_sub_models_config( text_encoder_config, audio_encoder_config, decoder_config, chroma_length=self.chroma_length @@ -1399,170 +1371,10 @@ class MusicgenMelodyTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester lm_heads = model.get_output_embeddings() self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear)) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[:batch_size, :] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - - return config, input_ids, attention_mask - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are - # different modalities -> different shapes) - def _greedy_generate( - self, - model, - input_ids, - attention_mask, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=False, - num_beams=1, - max_new_tokens=self.max_new_tokens, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - output_scores=output_scores, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are - # different modalities -> different shapes) - def _sample_generate( - self, - model, - input_ids, - attention_mask, - num_return_sequences, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - torch.manual_seed(0) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=True, - num_beams=1, - max_new_tokens=self.max_new_tokens, - num_return_sequences=num_return_sequences, - output_scores=output_scores, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_greedy_generate_dict_outputs(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) - - def test_greedy_generate_dict_outputs_use_cache(self): - for model_class in self.greedy_sample_model_classes: - # enable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - - config.use_cache = True - config.is_decoder = True - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - def test_sample_generate(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - model = model_class(config).to(torch_device).eval() - - # check `generate()` and `sample()` are equal - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=1, - ) - self.assertIsInstance(output_generate, torch.Tensor) - - def test_sample_generate_dict_output(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=3, - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - def test_generate_without_input_ids(self): - config, _, _ = self._get_input_ids_and_config() - - # if no bos token id => cannot generate from None - if config.bos_token_id is None: - self.skipTest(reason="bos_token_id is None") - - for model_class in self.greedy_sample_model_classes: - model = model_class(config).to(torch_device) - model.eval() - - output_ids_generate = model.generate( - do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True - ) - self.assertIsNotNone(output_ids_generate) - @require_torch_fp16 @require_torch_accelerator # not all operations are supported in fp16 on CPU def test_generate_fp16(self): @@ -1579,24 +1391,10 @@ class MusicgenMelodyTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester ) def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.audio_channels = 2 - - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @unittest.skip( reason="MusicgenMelodyModel is actually not the base of MusicgenMelodyForCausalLM as the latter is a composit model" diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index b74d0fdf03b..43e0b7afb49 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -101,7 +101,7 @@ class OlmoModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/olmoe/test_modeling_olmoe.py b/tests/models/olmoe/test_modeling_olmoe.py index 1ce231e0373..9c3af5723ee 100644 --- a/tests/models/olmoe/test_modeling_olmoe.py +++ b/tests/models/olmoe/test_modeling_olmoe.py @@ -111,7 +111,7 @@ class OlmoeModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 0d267fb8691..600c5b8a2f7 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -110,7 +110,7 @@ class PersimmonModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py index ce0a7187887..1186717a78c 100644 --- a/tests/models/phi3/test_modeling_phi3.py +++ b/tests/models/phi3/test_modeling_phi3.py @@ -151,7 +151,7 @@ class Phi3ModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 4d6c432f204..95bf2cce6d3 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -116,7 +116,7 @@ class Qwen2ModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index 0425172a6fb..e8eb915a328 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -134,7 +134,7 @@ class Qwen2MoeModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py index 23dace68cf2..d2f658f56bd 100644 --- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py +++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py @@ -103,7 +103,7 @@ class RecurrentGemmaModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 11c2e821975..d837742e9cc 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -684,20 +684,15 @@ class ReformerLocalAttnModelTest(ReformerTesterMixin, GenerationTesterMixin, Mod def test_left_padding_compatibility(self): pass - def _get_input_ids_and_config(self, batch_size=2): + def prepare_config_and_inputs_for_generate(self, *args, **kwargs): # override because overwise we hit max possible seq length for model (4*8=32) # decreasing the seq_length in tester causes errors for "training_tests", those need exactly max seq length # NOTE: seq_length has to be multiple of 4, otherwise it fails for other tests - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict.pop(self.input_name) - _ = inputs_dict.pop("attention_mask", None) - _ = inputs_dict.pop("decoder_input_ids", None) - _ = inputs_dict.pop("decoder_attention_mask", None) - input_ids = input_ids[:batch_size, :16] - attention_mask = torch.ones_like(input_ids, dtype=torch.long)[:batch_size, :16] - config.eos_token_id = None - config.forced_eos_token_id = None - return config, input_ids, attention_mask, inputs_dict + original_sequence_length = self.model_tester.seq_length + self.model_tester.seq_length = 16 + test_inputs = super().prepare_config_and_inputs_for_generate(*args, **kwargs) + self.model_tester.seq_length = original_sequence_length + return test_inputs @require_torch diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 79f70578554..cb09d44421f 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -360,8 +360,6 @@ class SeamlessM4TModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase): ) all_generative_model_classes = (SeamlessM4TForSpeechToText,) if is_torch_available() else () - input_name = "input_features" - def setUp(self): self.model_tester = SeamlessM4TModelTester(self, input_modality="speech") self.config_tester = ConfigTester(self, config_class=SeamlessM4TConfig) @@ -379,26 +377,6 @@ class SeamlessM4TModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase): model = SeamlessM4TModel.from_pretrained(model_name) self.assertIsNotNone(model) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict[self.input_name] - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - - # generate max 3 tokens - max_length = input_ids.shape[-1] + 3 - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - - attention_mask = torch.ones(input_ids.shape[:2], dtype=torch.long)[:batch_size, :sequence_length] - - return config, input_ids.float(), attention_mask, max_length - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index 1d11cbb247c..451fff0b35f 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -376,8 +376,6 @@ class SeamlessM4Tv2ModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase) ) all_generative_model_classes = (SeamlessM4Tv2ForSpeechToText,) if is_torch_available() else () - input_name = "input_features" - def setUp(self): self.model_tester = SeamlessM4Tv2ModelTester(self, input_modality="speech") self.config_tester = ConfigTester(self, config_class=SeamlessM4Tv2Config) @@ -395,26 +393,6 @@ class SeamlessM4Tv2ModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase) model = SeamlessM4Tv2Model.from_pretrained(model_name) self.assertIsNotNone(model) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict[self.input_name] - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - - # generate max 3 tokens - max_length = input_ids.shape[-1] + 3 - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - - attention_mask = torch.ones(input_ids.shape[:2], dtype=torch.long)[:batch_size, :sequence_length] - - return config, input_ids.float(), attention_mask, max_length - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index cef2a678177..50446d4628a 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -282,20 +282,6 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest test_pruning = False test_missing_keys = False - input_name = "input_features" - - def _get_input_ids_and_config(self, batch_size=2): - config, input_ids, attention_mask, inputs_dict = GenerationTesterMixin._get_input_ids_and_config(self) - - # `input_ids` is actually `input_features` which is a 3D tensor. - # We must overwrite the mask to make it 2D since the original `_get_input_ids_and_config` creates an - # attention mask of the same shape as `input_ids`. - if len(attention_mask.shape) > 2: - sequence_length = input_ids.shape[1] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long, device=attention_mask.device) - - return config, input_ids, attention_mask, inputs_dict - def setUp(self): self.model_tester = Speech2TextModelTester(self) self.config_tester = ConfigTester(self, config_class=Speech2TextConfig) @@ -632,46 +618,12 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest def test_generate_without_input_ids(self): pass - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, seq_length = input_ids.shape[:2] - subsampled_seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) - num_sequences_in_output = batch_size * num_return_sequences - gen_len = ( - output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length - ) - - # scores - self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) - - # Attentions - # encoder - self._check_encoder_attention_for_generate( - output.encoder_attentions, batch_size, config, subsampled_seq_length - ) - # decoder - self._check_attentions_for_generate( - num_sequences_in_output, - output.decoder_attentions, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - - # Hidden States - # encoder - self._check_encoder_hidden_states_for_generate( - output.encoder_hidden_states, batch_size, config, subsampled_seq_length - ) - - # decoder - self._check_hidden_states_for_generate( - num_sequences_in_output, - output.decoder_hidden_states, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, + def _check_outputs(self, output, main_input, config, use_cache=False, num_return_sequences=1): + # In this model, the index of `batch_size` and `sequence_length`` in `main_input` is different: they are the + # first two dimensions of the tensor. + main_input = main_input[:, :, 0] + super()._check_outputs( + output, main_input, config, use_cache=use_cache, num_return_sequences=num_return_sequences ) def _create_and_check_torchscript(self, config, inputs_dict): diff --git a/tests/models/speecht5/test_modeling_speecht5.py b/tests/models/speecht5/test_modeling_speecht5.py index e13cf8dd56c..97abf1a2cf2 100644 --- a/tests/models/speecht5/test_modeling_speecht5.py +++ b/tests/models/speecht5/test_modeling_speecht5.py @@ -177,8 +177,6 @@ class SpeechT5ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase test_headmasking = False test_resize_embeddings = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ModelTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -375,8 +373,6 @@ class SpeechT5ForSpeechToTextTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_headmasking = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ForSpeechToTextTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -895,8 +891,6 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_headmasking = False - input_name = "input_ids" - def setUp(self): self.model_tester = SpeechT5ForTextToSpeechTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -1441,8 +1435,6 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ForSpeechToSpeechTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -1854,8 +1846,6 @@ class SpeechT5HifiGanTest(ModelTesterMixin, unittest.TestCase): is_encoder_decoder = False has_attentions = False - input_name = "spectrogram" - def setUp(self): self.model_tester = SpeechT5HifiGanTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5HifiGanConfig) diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index 36cad89bcfd..c88fda6fb84 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -113,7 +113,7 @@ class StableLmModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/starcoder2/test_modeling_starcoder2.py b/tests/models/starcoder2/test_modeling_starcoder2.py index c1c7d45d4f1..7ab7faa90ea 100644 --- a/tests/models/starcoder2/test_modeling_starcoder2.py +++ b/tests/models/starcoder2/test_modeling_starcoder2.py @@ -107,7 +107,7 @@ class Starcoder2ModelTester: input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index d7b6fd84d5f..037f1b1e218 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -470,7 +470,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase): self.assertListEqual(expected_output_string, output_strings_xla) @slow - def test_greedy_generate(self): + def test_t5_greedy_generate(self): model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small") tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small") @@ -520,7 +520,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase): self.assertListEqual(expected_output_string_xla, output_strings_xla) @slow - def test_sample_generate(self): + def test_t5_sample_generate(self): model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small") tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small") diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py index f26a423a1a2..84d28c64587 100644 --- a/tests/models/univnet/test_modeling_univnet.py +++ b/tests/models/univnet/test_modeling_univnet.py @@ -118,8 +118,6 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase): is_encoder_decoder = False has_attentions = False - input_name = "input_features" - def setUp(self): self.model_tester = UnivNetModelTester(self) self.config_tester = ConfigTester( diff --git a/tests/models/vits/test_modeling_vits.py b/tests/models/vits/test_modeling_vits.py index 99ba51e35f6..36619409095 100644 --- a/tests/models/vits/test_modeling_vits.py +++ b/tests/models/vits/test_modeling_vits.py @@ -167,8 +167,6 @@ class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_torchscript = False has_attentions = False - input_name = "input_ids" - def setUp(self): self.model_tester = VitsModelTester(self) self.config_tester = ConfigTester(self, config_class=VitsConfig, hidden_size=37) diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index b4e71ca72e5..c719fcf989d 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -395,8 +395,6 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi # `0.5` is for `test_disk_offload` (which also works for `test_model_parallelism`) model_split_percents = [0.5, 0.8, 0.9] - input_name = "input_features" - # TODO: Fix the failed tests def is_pipeline_test_to_skip( self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name @@ -868,48 +866,6 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi def test_generate_without_input_ids(self): pass - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, mel, seq_length = input_ids.shape - subsampled_seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) - num_sequences_in_output = batch_size * num_return_sequences - gen_len = ( - output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length - ) - - # scores - self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) - - # Attentions - # encoder - self._check_encoder_attention_for_generate( - output.encoder_attentions, batch_size, config, subsampled_seq_length - ) - # decoder - self._check_attentions_for_generate( - num_sequences_in_output, - output.decoder_attentions, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - - # Hidden States - # encoder - self._check_encoder_hidden_states_for_generate( - output.encoder_hidden_states, batch_size, config, subsampled_seq_length - ) - - # decoder - self._check_hidden_states_for_generate( - num_sequences_in_output, - output.decoder_hidden_states, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - @require_flash_attn @require_torch_gpu @pytest.mark.flash_attn_test @@ -3511,8 +3467,6 @@ class WhisperEncoderModelTest(ModelTesterMixin, GenerationTesterMixin, unittest. test_pruning = False test_missing_keys = False - input_name = "input_features" - def setUp(self): self.model_tester = WhisperEncoderModelTester(self) self.config_tester = ConfigTester(self, config_class=WhisperConfig)