From 539f601be712619dc8c428f0a0b5e8e15f82ac4c Mon Sep 17 00:00:00 2001 From: Lysandre Date: Mon, 10 Feb 2020 13:45:57 -0500 Subject: [PATCH] intermediate_size > hidden_dim in distilbert config docstrings --- src/transformers/configuration_distilbert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/configuration_distilbert.py b/src/transformers/configuration_distilbert.py index d6fbdbff724..217dc7eb03e 100644 --- a/src/transformers/configuration_distilbert.py +++ b/src/transformers/configuration_distilbert.py @@ -60,7 +60,7 @@ class DistilBertConfig(PretrainedConfig): Number of attention heads for each attention layer in the Transformer encoder. dim (:obj:`int`, optional, defaults to 768): Dimensionality of the encoder layers and the pooler layer. - intermediate_size (:obj:`int`, optional, defaults to 3072): + hidden_dim (:obj:`int`, optional, defaults to 3072): The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. dropout (:obj:`float`, optional, defaults to 0.1): The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.