From 57b7c9ffb4886234207699cb9953036664c333c3 Mon Sep 17 00:00:00 2001 From: Tim Beyer <35711942+TimFelixBeyer@users.noreply.github.com> Date: Thu, 15 May 2025 16:45:47 +0200 Subject: [PATCH] Avoid incorrect generations for KV caches containing more than sliding_window tokens --- src/transformers/models/gemma3/modular_gemma3.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index ab1db5eb74e..a5b401a2657 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -593,7 +593,13 @@ class Gemma3TextModel(Gemma2Model): ) if cache_position is None: - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + if past_key_values is not None: + past_seen_tokens = past_key_values.get_seq_length() + if past_seen_tokens == past_key_values.config.sliding_window - 1: + raise ValueError("You must provide cache_position when using KV cache with more than sliding_window tokens.") + else: + past_seen_tokens = 0 + cache_position = torch.arange( past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1],