diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index f6cd770cdef..1dd1bcf5537 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -175,7 +175,7 @@ class AlbertEmbeddings(BertEmbeddings): def __init__(self, config): super().__init__(config) - self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) + self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=config.pad_token_id) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.embedding_size) self.LayerNorm = torch.nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps) diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 5ab6f3c2d0a..86470fcd6bf 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -61,7 +61,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out): class Embeddings(nn.Module): def __init__(self, config): super().__init__() - self.word_embeddings = nn.Embedding(config.vocab_size, config.dim, padding_idx=0) + self.word_embeddings = nn.Embedding(config.vocab_size, config.dim, padding_idx=config.pad_token_id) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.dim) if config.sinusoidal_pos_embds: create_sinusoidal_embeddings(