[AttentionMaskConverter] fix sdpa unmask unattended (#28369)

fix tensor device
2025-08-02 19:21:31 +06:00 · 2024-01-08 20:33:44 +08:00 · 2024-01-08 20:33:44 +08:00 · 87a6cf41d0
commit 87a6cf41d0
parent 98dba52ccd
1 changed files with 2 additions and 2 deletions
--- a/src/transformers/modeling_attn_mask_utils.py
+++ b/src/transformers/modeling_attn_mask_utils.py
@ -234,8 +234,8 @@ class AttentionMaskConverter:

        # Get the index of the first non-zero value for every sample in the batch.
        # In the above example, indices = [[2], [0], [1]]]
-        tmp = torch.arange(attention_mask.shape[1], 0, -1)
-        indices = torch.argmax(attention_mask.cpu() * tmp, 1, keepdim=True)
+        tmp = torch.arange(attention_mask.shape[1], 0, -1, device=attention_mask.device)
+        indices = torch.argmax(attention_mask * tmp, 1, keepdim=True)

        # Find the batch indexes that have unattended tokens on the leftmost side (e.g. [0, 0, 1, 1, 1]), for which the first rows of the
        # expanded mask will be completely unattended.