[docstring] Fix docstring for RwkvConfig (#26782)

* update check_docstrings * update docstring
2025-07-04 05:10:06 +06:00 · 2023-10-13 03:20:30 -05:00 · 2023-10-13 03:20:30 -05:00 · d085662c59
commit d085662c59
parent 21da3b2461
2 changed files with 2 additions and 3 deletions
--- a/src/transformers/models/rwkv/configuration_rwkv.py
+++ b/src/transformers/models/rwkv/configuration_rwkv.py
@ -61,7 +61,7 @@ class RwkvConfig(PretrainedConfig):
            Dimensionality of the attention hidden states. Will default to `hidden_size` if unset.
        intermediate_size (`int`, *optional*):
            Dimensionality of the inner feed-forward layers. Will default to 4 times `hidden_size` if unset.
-        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
            The epsilon to use in the layer normalization layers.
        bos_token_id (`int`, *optional*, defaults to 0):
            The id of the beginning of sentence token in the vocabulary. Defaults to 0 as RWKV uses the same tokenizer
@ -69,7 +69,7 @@ class RwkvConfig(PretrainedConfig):
        eos_token_id (`int`, *optional*, defaults to 0):
            The id of the end of sentence token in the vocabulary. Defaults to 0 as RWKV uses the same tokenizer as
            GPTNeoX.
-        rescale_every (`int`, *optional*, default to 6):
+        rescale_every (`int`, *optional*, defaults to 6):
            At inference, the hidden states (and weights of the correponding output layers) are divided by 2 every
            `rescale_every` layer. If set to 0 or a negative number, no rescale is done.
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@ -471,7 +471,6 @@ OBJECTS_TO_IGNORE = [
    "RobertaPreLayerNormConfig",
    "RobertaPreLayerNormModel",
    "RobertaTokenizerFast",
-    "RwkvConfig",
    "SEWConfig",
    "SEWDConfig",
    "SEWDForCTC",