Fix idefics cache (#31377)

* fix idefics cache

* fix tests
This commit is contained in:
Raushan Turganbay 2024-06-12 15:24:32 +05:00 committed by GitHub
parent a2ede66674
commit 08ad34b19e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1593,10 +1593,11 @@ class Idefics2Model(Idefics2PreTrainedModel):
past_seen_tokens = 0
return_legacy_cache = False
if use_cache and not isinstance(past_key_values, Cache): # kept for BC (non `Cache` `past_key_values` inputs)
return_legacy_cache = True
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
past_seen_tokens = past_key_values.get_usable_length(seq_length)
if use_cache:
if not isinstance(past_key_values, Cache): # kept for BC (non `Cache` `past_key_values` inputs)
return_legacy_cache = True
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
past_seen_tokens = past_key_values.get_seq_length()
if inputs_embeds is not None and input_ids is None and past_seen_tokens == 0:
raise ValueError("When first calling the model, if input_embeds are passed, input_ids should not be None.")
@ -1669,7 +1670,7 @@ class Idefics2Model(Idefics2PreTrainedModel):
return_dict=return_dict,
)
if return_legacy_cache:
if return_legacy_cache and use_cache:
outputs.past_key_values = outputs.past_key_values.to_legacy_cache()
if not return_dict:
@ -1880,8 +1881,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel):
# Omit tokens covered by past_key_values
if past_key_values is not None:
# Past key values are always initialized with a `Cache` object -> no need for if-else anymore
cache_length = past_key_values.get_seq_length()
past_length = past_key_values.seen_tokens
past_length = past_key_values.get_seq_length()
max_cache_length = past_key_values.get_max_length()
# Keep only the unprocessed tokens:
@ -1900,7 +1900,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel):
if (
max_cache_length is not None
and attention_mask is not None
and cache_length + input_ids.shape[1] > max_cache_length
and past_length + input_ids.shape[1] > max_cache_length
):
attention_mask = attention_mask[:, -max_cache_length:]