From 6ee1a4fd3e80feef8fe7dc65aabb4c5270524f8a Mon Sep 17 00:00:00 2001 From: Vasudev Gupta <7vasudevgupta@gmail.com> Date: Thu, 13 May 2021 16:21:30 +0530 Subject: [PATCH] add everything (#11651) --- src/transformers/models/big_bird/modeling_big_bird.py | 8 ++++---- .../models/bigbird_pegasus/modeling_bigbird_pegasus.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 7acea14b9ee..45a4ad76b57 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -647,13 +647,13 @@ class BigBirdBlockSparseAttention(nn.Module): [ to_mask[:, :, :, : 3 * to_block_size], to_mask[:, :, :, -to_block_size:], - first_context_layer.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), + to_mask.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), ], dim=3, ) second_rand_pad = torch.cat( [ - first_context_layer.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), + rand_mask.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), rand_mask[:, :, 0], ], dim=3, @@ -781,13 +781,13 @@ class BigBirdBlockSparseAttention(nn.Module): [ to_mask[:, :, :, :to_block_size], to_mask[:, :, :, -3 * to_block_size :], - context_layer.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), + to_mask.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), ], dim=3, ) second_last_rand_pad = torch.cat( [ - context_layer.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), + rand_mask.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), rand_mask[:, :, -1], ], dim=3, diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 426362ad39b..ea3f5453344 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -475,13 +475,13 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): [ to_mask[:, :, :, : 3 * to_block_size], to_mask[:, :, :, -to_block_size:], - first_context_layer.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), + to_mask.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), ], dim=3, ) second_rand_pad = torch.cat( [ - first_context_layer.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), + rand_mask.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), rand_mask[:, :, 0], ], dim=3, @@ -609,13 +609,13 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): [ to_mask[:, :, :, :to_block_size], to_mask[:, :, :, -3 * to_block_size :], - context_layer.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), + to_mask.new_ones([bsz, 1, 1, n_rand_blocks * to_block_size]), ], dim=3, ) second_last_rand_pad = torch.cat( [ - context_layer.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), + rand_mask.new_ones([bsz, n_heads, from_block_size, 4 * to_block_size]), rand_mask[:, :, -1], ], dim=3,