Avoid incorrect generations for KV caches containing more than sliding_window tokens

2025-07-04 05:10:06 +06:00 · 2025-05-15 16:45:47 +02:00 · 2025-05-15 16:45:47 +02:00 · 57b7c9ffb4
commit 57b7c9ffb4
parent 27ef46e846
1 changed files with 7 additions and 1 deletions
--- a/src/transformers/models/gemma3/modular_gemma3.py
+++ b/src/transformers/models/gemma3/modular_gemma3.py
@ -593,7 +593,13 @@ class Gemma3TextModel(Gemma2Model):
            )
        if cache_position is None:
-            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
+            if past_key_values is not None:
                past_seen_tokens = past_key_values.get_seq_length()
                if past_seen_tokens == past_key_values.config.sliding_window - 1:
                    raise ValueError("You must provide cache_position when using KV cache with more than sliding_window tokens.")
            else:
                past_seen_tokens = 0
            cache_position = torch.arange(
                past_seen_tokens,
                past_seen_tokens + inputs_embeds.shape[1],