mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
fixed docstring typos (#18739)
* fixed docstring typos * Added missing colon Co-authored-by: 김주영 <juyoung@zezedu.com>
This commit is contained in:
parent
e49c71fc4c
commit
dcff504e18
@ -77,17 +77,17 @@ class BartConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
Scale embeddings by diving by sqrt(d_model).
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||
num_labels: (`int`, *optional*, defaults to 3):
|
||||
num_labels (`int`, *optional*, defaults to 3):
|
||||
The number of labels to use in [`BartForSequenceClassification`].
|
||||
forced_eos_token_id (`int`, *optional*, defaults to 2):
|
||||
The id of the token to force as the last generated token when `max_length` is reached. Usually set to
|
||||
|
@ -383,7 +383,7 @@ class BasicTokenizer(object):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -85,10 +85,10 @@ class BigBirdPegasusConfig(PretrainedConfig):
|
||||
just in case (e.g., 1024 or 2048 or 4096).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -78,10 +78,10 @@ class BlenderbotConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -78,10 +78,10 @@ class BlenderbotSmallConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -74,10 +74,10 @@ class DetrConfig(PretrainedConfig):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
init_xavier_std (`float`, *optional*, defaults to 1):
|
||||
The scaling factor used for the Xavier initialization gain in the HM Attention map module.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
auxiliary_loss (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -404,7 +404,7 @@ DPR_ENCODERS_INPUTS_DOCSTRING = r"""
|
||||
|
||||
DPR_READER_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids: (`Tuple[torch.LongTensor]` of shapes `(n_passages, sequence_length)`):
|
||||
input_ids (`Tuple[torch.LongTensor]` of shapes `(n_passages, sequence_length)`):
|
||||
Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question
|
||||
and 2) the passages titles and 3) the passages texts To match pretraining, DPR `input_ids` sequence should
|
||||
be formatted with [CLS] and [SEP] with the format:
|
||||
|
@ -493,7 +493,7 @@ TF_DPR_ENCODERS_INPUTS_DOCSTRING = r"""
|
||||
|
||||
TF_DPR_READER_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids: (`Numpy array` or `tf.Tensor` of shapes `(n_passages, sequence_length)`):
|
||||
input_ids (`Numpy array` or `tf.Tensor` of shapes `(n_passages, sequence_length)`):
|
||||
Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question
|
||||
and 2) the passages titles and 3) the passages texts To match pretraining, DPR `input_ids` sequence should
|
||||
be formatted with [CLS] and [SEP] with the format:
|
||||
|
@ -136,7 +136,7 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple.
|
||||
kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
kwargs (*optional*): Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
|
||||
- Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function.
|
||||
- With a *decoder_* prefix which will be input as `**decoder_kwargs` for the decoder forward function.
|
||||
|
@ -147,7 +147,7 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
behaviors between training and evaluation).
|
||||
kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
kwargs (*optional*): Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
|
||||
- Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function.
|
||||
- With a *decoder_* prefix which will be input as `**decoder_kwargs`` for the decoder forward function.
|
||||
|
@ -95,9 +95,9 @@ class FSMTConfig(PretrainedConfig):
|
||||
End of stream token id.
|
||||
decoder_start_token_id (`int`, *optional*):
|
||||
This model starts decoding with `eos_token_id`
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
Google "layerdrop arxiv", as its not explainable in one line.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
Google "layerdrop arxiv", as its not explainable in one line.
|
||||
is_encoder_decoder (`bool`, *optional*, defaults to `True`):
|
||||
Whether this is an encoder/decoder model.
|
||||
|
@ -1362,7 +1362,7 @@ class BasicTokenizer(object):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -108,7 +108,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
|
||||
tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see [this
|
||||
issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original LayoutLMv2).
|
||||
"""
|
||||
|
@ -74,10 +74,10 @@ class LEDConfig(PretrainedConfig):
|
||||
The maximum sequence length that the decoder might ever be used with.
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -76,10 +76,10 @@ class M2M100Config(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -76,10 +76,10 @@ class MarianConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -76,10 +76,10 @@ class MBartConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -340,7 +340,7 @@ class BasicTokenizer(object):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -71,10 +71,10 @@ class PegasusConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -74,10 +74,10 @@ class PLBartConfig(PretrainedConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
scale_embedding (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -94,7 +94,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -49,7 +49,7 @@ RAG_CONFIG_DOC = r"""
|
||||
`"compressed"`.
|
||||
index_path (`str`, *optional*)
|
||||
The path to the serialized faiss index on disk.
|
||||
passages_path: (`str`, *optional*):
|
||||
passages_path (`str`, *optional*):
|
||||
A path to text passages compatible with the faiss index. Required if using
|
||||
[`~models.rag.retrieval_rag.LegacyIndex`]
|
||||
use_dummy_dataset (`bool`, *optional*, defaults to `False`)
|
||||
|
@ -132,7 +132,7 @@ class RealmTokenizer(PreTrainedTokenizer):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -103,7 +103,7 @@ class RoFormerTokenizer(PreTrainedTokenizer):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
|
||||
|
@ -143,7 +143,7 @@ SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
into a tensor of type `torch.FloatTensor`. See [`~Speech2TextFeatureExtractor.__call__`]
|
||||
return_dict (`bool`, *optional*):
|
||||
If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple.
|
||||
kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
kwargs (*optional*): Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
|
||||
- Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function.
|
||||
- With a *decoder_* prefix which will be input as `**decoder_kwargs` for the decoder forward function.
|
||||
|
@ -70,10 +70,10 @@ class Speech2TextConfig(PretrainedConfig):
|
||||
The dropout ratio for classifier.
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -64,14 +64,15 @@ class Speech2Text2Config(PretrainedConfig):
|
||||
The dropout ratio for classifier.
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
https://arxiv.org/abs/1909.11556>`__ for more details. decoder_layerdrop: (`float`, *optional*, defaults to
|
||||
0.0): The LayerDrop probability for the decoder. See the [LayerDrop paper](see
|
||||
https://arxiv.org/abs/1909.11556) for more details.
|
||||
https://arxiv.org/abs/1909.11556>`__ for more details.
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||
max_source_positions (`int`, *optional*, defaults to 6000):
|
||||
The maximum sequence length of log-mel filter-bank features that this model might ever be used with.
|
||||
max_target_positions: (`int`, *optional*, defaults to 1024):
|
||||
max_target_positions (`int`, *optional*, defaults to 1024):
|
||||
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
|
||||
|
@ -111,7 +111,7 @@ class SplinterTokenizer(PreTrainedTokenizer):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
@ -340,7 +340,7 @@ class BasicTokenizer(object):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -87,10 +87,10 @@ class SplinterTokenizerFast(PreTrainedTokenizerFast):
|
||||
tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see [this
|
||||
issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
wordpieces_prefix: (`str`, *optional*, defaults to `"##"`):
|
||||
wordpieces_prefix (`str`, *optional*, defaults to `"##"`):
|
||||
The prefix for subwords.
|
||||
"""
|
||||
|
||||
|
@ -1008,14 +1008,14 @@ T5_INPUTS_DOCSTRING = r"""
|
||||
decoder_attention_mask (`tf.Tensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
||||
Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
|
||||
be used by default.
|
||||
head_mask: (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
Mask to nullify selected heads of the self-attention modules in the encoder. Mask values selected in `[0,
|
||||
1]`:
|
||||
|
||||
- 1 indicates the head is **not masked**,
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
decoder_head_mask: (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
decoder_head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
Mask to nullify selected heads of the self-attention modules in the decoder. Mask values selected in `[0,
|
||||
1]`:
|
||||
|
||||
@ -1084,7 +1084,7 @@ T5_ENCODER_INPUTS_DOCSTRING = r"""
|
||||
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
|
||||
is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
|
||||
model's internal embedding lookup matrix.
|
||||
head_mask: (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
|
||||
|
||||
- 1 indicates the head is **not masked**,
|
||||
|
@ -293,7 +293,7 @@ class TapasTokenizer(PreTrainedTokenizer):
|
||||
tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
cell_trim_length (`int`, *optional*, defaults to -1):
|
||||
@ -2053,7 +2053,7 @@ class BasicTokenizer(object):
|
||||
|
||||
This should likely be deactivated for Japanese (see this
|
||||
[issue](https://github.com/huggingface/transformers/issues/328)).
|
||||
strip_accents: (`bool`, *optional*):
|
||||
strip_accents (`bool`, *optional*):
|
||||
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
||||
value for `lowercase` (as in the original BERT).
|
||||
"""
|
||||
|
@ -67,7 +67,7 @@ class TrOCRConfig(PretrainedConfig):
|
||||
The dropout ratio for classifier.
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -136,7 +136,7 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
behaviors between training and evaluation).
|
||||
kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
kwargs (*optional*): Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
|
||||
- Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function.
|
||||
- With a *decoder_* prefix which will be input as `**decoder_kwargs` for the decoder forward function.
|
||||
|
@ -137,7 +137,7 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple.
|
||||
kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
kwargs (*optional*): Remaining dictionary of keyword arguments. Keyword arguments come in two flavors:
|
||||
|
||||
- Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function.
|
||||
- With a *decoder_* prefix which will be input as `**decoder_kwargs` for the decoder forward function.
|
||||
|
@ -61,7 +61,7 @@ class XGLMConfig(PretrainedConfig):
|
||||
The dropout ratio for the attention probabilities.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
||||
for more details.
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
|
Loading…
Reference in New Issue
Block a user