mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
update code comment
This commit is contained in:
parent
34bdc8b54f
commit
34a1a01091
@ -337,8 +337,8 @@ class BertModel(nn.Module):
|
||||
token_type_ids = torch.zeros_like(input_ids)
|
||||
|
||||
# We create a 3D attention mask from a 2D tensor mask.
|
||||
# Sizes are [batch_size, 1, 1, from_seq_length]
|
||||
# So we can broadcast to [batch_size, num_heads, to_seq_length, from_seq_length]
|
||||
# Sizes are [batch_size, 1, 1, to_seq_length]
|
||||
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
|
||||
# this attention mask is more simple than the triangular masking of causal attention
|
||||
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
|
||||
extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
|
||||
|
Loading…
Reference in New Issue
Block a user