diff --git a/tests/models/mllama/test_modeling_mllama.py b/tests/models/mllama/test_modeling_mllama.py index e67e0455e1f..589cff4c02b 100644 --- a/tests/models/mllama/test_modeling_mllama.py +++ b/tests/models/mllama/test_modeling_mllama.py @@ -17,7 +17,6 @@ import unittest import pytest import requests -from parameterized import parameterized from transformers import ( AutoProcessor, @@ -396,12 +395,6 @@ class MllamaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTester def test_model_parallelism(self): pass - @parameterized.expand([("offloaded",)]) - @pytest.mark.generate - @unittest.skip(reason="Offloaded cache seems to not work with mllama's kv cache type") - def test_offloaded_cache_implementation(self, cache_implementation): - pass - @unittest.skip( reason="Mllama cache type doesn't allow correct check on output `past_key_values` due to `Cache.crop()`" ) diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index 161bb33a801..ffc4b59abb7 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -546,12 +546,6 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi def test_generate_with_head_masking(self): pass - @parameterized.expand([("offloaded",)]) - @pytest.mark.generate - @unittest.skip(reason="Whisper doesn't work with offloaded cache implementation yet") - def test_offloaded_cache_implementation(self, cache_implementation): - pass - @require_torch_fp16 def test_generate_fp16(self): config, input_dict = self.model_tester.prepare_config_and_inputs()