diff --git a/src/transformers/models/albert/configuration_albert.py b/src/transformers/models/albert/configuration_albert.py index fd0c6238879..cacc0499035 100644 --- a/src/transformers/models/albert/configuration_albert.py +++ b/src/transformers/models/albert/configuration_albert.py @@ -85,6 +85,12 @@ class AlbertConfig(PretrainedConfig): [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658). + pad_token_id (`int`, *optional*, defaults to 0): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 2): + Beginning of stream token id. + eos_token_id (`int`, *optional*, defaults to 3): + End of stream token id. Examples: diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 8d69ea603f1..650b519eaa5 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -75,7 +75,6 @@ OBJECTS_TO_IGNORE = [ "TFGPT2Tokenizer", # Missing arguments in the docstring "ASTFeatureExtractor", - "AlbertConfig", "AlbertModel", "AlbertTokenizerFast", "AlignTextModel",