Fix LXMERT with DataParallel (#7471)

2025-07-31 02:02:21 +06:00 · 2020-09-30 12:41:24 +02:00 · 2020-09-30 12:41:24 +02:00 · 886ef35ce6
commit 886ef35ce6
parent 35e94c68df
1 changed files with 1 additions and 1 deletions
--- a/src/transformers/modeling_lxmert.py
+++ b/src/transformers/modeling_lxmert.py
@ -958,7 +958,7 @@ class LxmertModel(LxmertPreTrainedModel):
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
-        extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype)
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype)
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        # Process the visual attention mask