Fix idefics cache (#31377)

* fix idefics cache * fix tests
2025-07-31 02:02:21 +06:00 · 2024-06-12 15:24:32 +05:00 · 2024-06-12 15:24:32 +05:00 · 08ad34b19e
commit 08ad34b19e
parent a2ede66674
1 changed files with 8 additions and 8 deletions
--- a/src/transformers/models/idefics2/modeling_idefics2.py
+++ b/src/transformers/models/idefics2/modeling_idefics2.py
@ -1593,10 +1593,11 @@ class Idefics2Model(Idefics2PreTrainedModel):

        past_seen_tokens = 0
        return_legacy_cache = False
-        if use_cache and not isinstance(past_key_values, Cache):  # kept for BC (non `Cache` `past_key_values` inputs)
-            return_legacy_cache = True
-            past_key_values = DynamicCache.from_legacy_cache(past_key_values)
-            past_seen_tokens = past_key_values.get_usable_length(seq_length)
+        if use_cache:
+            if not isinstance(past_key_values, Cache):  # kept for BC (non `Cache` `past_key_values` inputs)
+                return_legacy_cache = True
+                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            past_seen_tokens = past_key_values.get_seq_length()

        if inputs_embeds is not None and input_ids is None and past_seen_tokens == 0:
            raise ValueError("When first calling the model, if input_embeds are passed, input_ids should not be None.")
@ -1669,7 +1670,7 @@ class Idefics2Model(Idefics2PreTrainedModel):
            return_dict=return_dict,
        )

-        if return_legacy_cache:
+        if return_legacy_cache and use_cache:
            outputs.past_key_values = outputs.past_key_values.to_legacy_cache()

        if not return_dict:
@ -1880,8 +1881,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel):
        # Omit tokens covered by past_key_values
        if past_key_values is not None:
            # Past key values are always initialized with a `Cache` object -> no need for if-else anymore
-            cache_length = past_key_values.get_seq_length()
-            past_length = past_key_values.seen_tokens
+            past_length = past_key_values.get_seq_length()
            max_cache_length = past_key_values.get_max_length()

            # Keep only the unprocessed tokens:
@ -1900,7 +1900,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel):
            if (
                max_cache_length is not None
                and attention_mask is not None
-                and cache_length + input_ids.shape[1] > max_cache_length
+                and past_length + input_ids.shape[1] > max_cache_length
            ):
                attention_mask = attention_mask[:, -max_cache_length:]