From ed98ad35e64a73f8b04afc9a498497a0cbce66a9 Mon Sep 17 00:00:00 2001
From: Pavel Gein <pavelgejn@yandex.ru>
Date: Thu, 6 Feb 2025 15:33:42 +0500
Subject: [PATCH] Fix usage of unpad_input function (#35925)

Fix usage of unpad_function

See https://github.com/huggingface/transformers/issues/35899

In the [commit](https://github.com/Dao-AILab/flash-attention/commit/cdbbe844b1c0bcba3362e1f8c8af4d6f6d0bf300) return type of `unpad_input` was changed.
Now the code support older and newer versions

Co-authored-by: Pavel Gein <pavel.gein@gmail.com>
---
 src/transformers/modeling_flash_attention_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/modeling_flash_attention_utils.py b/src/transformers/modeling_flash_attention_utils.py
index 08b1a7481d9..d4c7bec0790 100644
--- a/src/transformers/modeling_flash_attention_utils.py
+++ b/src/transformers/modeling_flash_attention_utils.py
@@ -121,7 +121,7 @@ def _upad_input(
     else:
         # The -q_len: slice assumes left padding.
         attention_mask = attention_mask[:, -query_length:]
-        query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask)
+        query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q, *_ = unpad_input(query_layer, attention_mask)
 
     return (
         query_layer,