fix function that defines masks in XLM

the definition of `get_masks` would blow with the proper combination of arguments. It was just a matter of moving a definition outside of a control structure.
2025-07-31 02:02:21 +06:00 · 2019-10-16 12:50:36 +02:00 · 2019-10-16 12:50:36 +02:00 · c5a94a6100
commit c5a94a6100
parent 488a664151
1 changed files with 2 additions and 2 deletions
--- a/transformers/modeling_xlm.py
+++ b/transformers/modeling_xlm.py
@ -73,16 +73,16 @@ def get_masks(slen, lengths, causal, padding_mask=None):
    """
    Generate hidden states mask, and optionally an attention mask.
    """
-    bs = lengths.size(0)
+    alen = torch.arange(slen, dtype=torch.long, device=lengths.device)
    if padding_mask is not None:
        mask = padding_mask
    else:
        assert lengths.max().item() <= slen
-        alen = torch.arange(slen, dtype=torch.long, device=lengths.device)
        mask = alen < lengths[:, None]

    # attention mask is the same as mask, or triangular inferior attention (causal)
    if causal:
+        bs = lengths.size(0)
        attn_mask = alen[None, None, :].repeat(bs, slen, 1) <= alen[None, :, None]
    else:
        attn_mask = mask