mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
intermediate_size > hidden_dim in distilbert config docstrings
This commit is contained in:
parent
cfb7d108bd
commit
539f601be7
@ -60,7 +60,7 @@ class DistilBertConfig(PretrainedConfig):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
dim (:obj:`int`, optional, defaults to 768):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
intermediate_size (:obj:`int`, optional, defaults to 3072):
|
||||
hidden_dim (:obj:`int`, optional, defaults to 3072):
|
||||
The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
dropout (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
|
Loading…
Reference in New Issue
Block a user