mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Avoid incorrect generations for KV caches containing more than sliding_window tokens
This commit is contained in:
parent
27ef46e846
commit
57b7c9ffb4
@ -593,7 +593,13 @@ class Gemma3TextModel(Gemma2Model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if cache_position is None:
|
if cache_position is None:
|
||||||
past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
|
if past_key_values is not None:
|
||||||
|
past_seen_tokens = past_key_values.get_seq_length()
|
||||||
|
if past_seen_tokens == past_key_values.config.sliding_window - 1:
|
||||||
|
raise ValueError("You must provide cache_position when using KV cache with more than sliding_window tokens.")
|
||||||
|
else:
|
||||||
|
past_seen_tokens = 0
|
||||||
|
|
||||||
cache_position = torch.arange(
|
cache_position = torch.arange(
|
||||||
past_seen_tokens,
|
past_seen_tokens,
|
||||||
past_seen_tokens + inputs_embeds.shape[1],
|
past_seen_tokens + inputs_embeds.shape[1],
|
||||||
|
Loading…
Reference in New Issue
Block a user