Fixes for the documentation (#13361)

This commit is contained in:
Sylvain Gugger 2021-09-01 07:54:28 -04:00 committed by GitHub
parent 0b8c84e110
commit 7a26307e31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 32 additions and 14 deletions

View File

@ -87,7 +87,7 @@ class PretrainedConfig(PushToHubMixin):
Whether cross-attention layers should be added to the model. Note, this option is only relevant for models
that can be used as decoder models within the `:class:~transformers.EncoderDecoderModel` class, which
consists of all models in ``AUTO_MODELS_FOR_CAUSAL_LM``.
tie_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`)
tie_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether all encoder weights should be tied to their equivalent decoder weights. This requires the encoder
and decoder model to have the exact same parameter names.
prune_heads (:obj:`Dict[int, List[int]]`, `optional`, defaults to :obj:`{}`):

View File

@ -16,6 +16,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .data_collator import (
DataCollatorForLanguageModeling,
DataCollatorForPermutationLanguageModeling,
DataCollatorForSeq2Seq,
DataCollatorForSOP,
DataCollatorForTokenClassification,
DataCollatorForWholeWordMask,
DataCollatorWithPadding,
default_data_collator,
)
from .metrics import glue_compute_metrics, xnli_compute_metrics
from .processors import (
DataProcessor,

View File

@ -50,7 +50,7 @@ class HfDeepSpeedConfig:
values: ``"auto"``. Without this special logic the DeepSpeed configuration is not modified in any way.
Args:
config_file_or_dict (:obj:`Union[str, Dict]`) - path to DeepSpeed config file or dict.
config_file_or_dict (:obj:`Union[str, Dict]`): path to DeepSpeed config file or dict.
"""

View File

@ -1535,10 +1535,14 @@ def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-float("In
Args:
logits: logits distribution shape (batch size, vocabulary size)
if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
Make sure we keep at least min_tokens_to_keep per batch example in the output
top_k (:obj:`int`, `optional`, defaults to 0):
If > 0, only keep the top k tokens with highest probability (top-k filtering)
top_p (:obj:`float`, `optional`, defaults to 1.0):
If < 1.0, only keep the top tokens with cumulative probability >= top_p (nucleus filtering). Nucleus
filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
min_tokens_to_keep (:obj:`int`, `optional`, defaults to 1):
Minimumber of tokens we keep per batch example in the output.
From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
"""
logits_shape = shape_list(logits)

View File

@ -2557,10 +2557,14 @@ def top_k_top_p_filtering(
Args:
logits: logits distribution shape (batch size, vocabulary size)
if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
Make sure we keep at least min_tokens_to_keep per batch example in the output
top_k (:obj:`int`, `optional`, defaults to 0):
If > 0, only keep the top k tokens with highest probability (top-k filtering)
top_p (:obj:`float`, `optional`, defaults to 1.0):
If < 1.0, only keep the top tokens with cumulative probability >= top_p (nucleus filtering). Nucleus
filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
min_tokens_to_keep (:obj:`int`, `optional`, defaults to 1):
Minimumber of tokens we keep per batch example in the output.
From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
"""
if top_k > 0:

View File

@ -78,7 +78,7 @@ class Speech2TextConfig(PretrainedConfig):
Whether or not the model should return the last key/values attentions (not used by all models).
max_source_positions (:obj:`int`, `optional`, defaults to 6000):
The maximum sequence length of log-mel filter-bank features that this model might ever be used with.
max_target_positions: (:obj:`int`, `optional`, defaults to 1024):
max_target_positions (:obj:`int`, `optional`, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
num_conv_layers (:obj:`int`, `optional`, defaults to 2):
@ -95,7 +95,7 @@ class Speech2TextConfig(PretrainedConfig):
input_channels (:obj:`int`, `optional`, defaults to 1):
An integer specifying number of input channels of the input feature vector.
Example::
Example::
>>> from transformers import Speech2TextModel, Speech2TextConfig

View File

@ -306,10 +306,10 @@ def pipeline(
- :obj:`"feature-extraction"`: will return a :class:`~transformers.FeatureExtractionPipeline`.
- :obj:`"text-classification"`: will return a :class:`~transformers.TextClassificationPipeline`.
- :obj:`"sentiment-analysis"`: (alias of :obj:`"text-classification") will return a
- :obj:`"sentiment-analysis"`: (alias of :obj:`"text-classification"`) will return a
:class:`~transformers.TextClassificationPipeline`.
- :obj:`"token-classification"`: will return a :class:`~transformers.TokenClassificationPipeline`.
- :obj:`"ner"` (alias of :obj:`"token-classification"): will return a
- :obj:`"ner"` (alias of :obj:`"token-classification"`): will return a
:class:`~transformers.TokenClassificationPipeline`.
- :obj:`"question-answering"`: will return a :class:`~transformers.QuestionAnsweringPipeline`.
- :obj:`"fill-mask"`: will return a :class:`~transformers.FillMaskPipeline`.