Remove hard-coded pad token id in distilbert and albert (#3965)

This commit is contained in:
Jangwon Park 2020-05-12 21:32:44 +09:00 committed by GitHub
parent 30e343862f
commit 31e67dd19f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 2 deletions

View File

@ -175,7 +175,7 @@ class AlbertEmbeddings(BertEmbeddings):
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=config.pad_token_id)
self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size)
self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.embedding_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.embedding_size)
self.LayerNorm = torch.nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps) self.LayerNorm = torch.nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps)

View File

@ -61,7 +61,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
class Embeddings(nn.Module): class Embeddings(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
self.word_embeddings = nn.Embedding(config.vocab_size, config.dim, padding_idx=0) self.word_embeddings = nn.Embedding(config.vocab_size, config.dim, padding_idx=config.pad_token_id)
self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.dim) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.dim)
if config.sinusoidal_pos_embds: if config.sinusoidal_pos_embds:
create_sinusoidal_embeddings( create_sinusoidal_embeddings(