fixed lots of typos. (#7758)

This commit is contained in:
Tiger 2020-10-13 09:00:20 -05:00 committed by GitHub
parent 8cb4ecca25
commit 7e73c12805
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 36 additions and 36 deletions

View File

@ -12,7 +12,7 @@ subclass :class:`~transformers.Trainer` and override the methods you need (see :
By default a :class:`~transformers.Trainer` will use the following callbacks:
- :class:`~transformers.DefaultFlowCallback` which handles the default beahvior for logging, saving and evaluation.
- :class:`~transformers.DefaultFlowCallback` which handles the default behavior for logging, saving and evaluation.
- :class:`~transformers.PrinterCallback` or :class:`~transformers.ProrgressCallback` to display progress and print the
logs (the first one is used if you deactivate tqdm through the :class:`~transformers.TrainingArguments`, otherwise
it's the second one).

View File

@ -15,7 +15,7 @@ Both :class:`~transformers.Trainer` and :class:`~transformers.TFTrainer` contain
previous features. To inject custom behavior you can subclass them and override the following methods:
- **get_train_dataloader**/**get_train_tfdataset** -- Creates the training DataLoader (PyTorch) or TF Dataset.
- **get_eval_dataloader**/**get_eval_tfdataset** -- Creates the evaulation DataLoader (PyTorch) or TF Dataset.
- **get_eval_dataloader**/**get_eval_tfdataset** -- Creates the evaluation DataLoader (PyTorch) or TF Dataset.
- **get_test_dataloader**/**get_test_tfdataset** -- Creates the test DataLoader (PyTorch) or TF Dataset.
- **log** -- Logs information on the various objects watching training.
- **create_optimizer_and_scheduler** -- Setups the optimizer and learning rate scheduler if they were not passed at

View File

@ -66,7 +66,7 @@ The library is built around three types of classes for each model:
All these classes can be instantiated from pretrained instances and saved locally using two methods:
- :obj:`from_pretrained()` lets you instantiate a model/configuration/tokenizer from a pretrained version either
provided by the library itself (the suported models are provided in the list :doc:`here <pretrained_models>`
provided by the library itself (the supported models are provided in the list :doc:`here <pretrained_models>`
or stored locally (or on a server) by the user,
- :obj:`save_pretrained()` lets you save a model/configuration/tokenizer locally so that it can be reloaded using
:obj:`from_pretrained()`.

View File

@ -39,7 +39,7 @@ python run_summarization.py \
--compute_rouge true
```
The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not suported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not supported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
## Summarize any text

View File

@ -31,7 +31,7 @@ class MMBTConfig(object):
Config of the underlying Transformer models. Its values are copied over to use a single config.
num_labels (:obj:`int`, `optional`):
Size of final Linear layer for classification.
modal_hidden_size (:obj:`int`, `optional`, defautls to 2048):
modal_hidden_size (:obj:`int`, `optional`, defaults to 2048):
Embedding dimension of the non-text modality encoder.
"""

View File

@ -274,7 +274,7 @@ class PretrainedConfig(object):
Path to a directory in which a downloaded pretrained model configuration should be cached if the
standard cache should not be used.
force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter or not to force to (re-)download the configuration files and override the cached versions if they
Whether or not to force to (re-)download the configuration files and override the cached versions if they
exist.
resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to delete incompletely received file. Attempts to resume the download if such a file

View File

@ -211,7 +211,7 @@ def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokeniz
pipeline_name: The kind of pipeline to use (ner, question-answering, etc.)
framework: The actual model to convert the pipeline from ("pt" or "tf")
model: The model name which will be loaded by the pipeline
tokenizer: The tokenizer name which will be loaded by the pipeline, defaut to the model's value
tokenizer: The tokenizer name which will be loaded by the pipeline, default to the model's value
Returns: Pipeline object

View File

@ -560,7 +560,7 @@ class SquadProcessor(DataProcessor):
Args:
dataset: The tfds dataset loaded from `tensorflow_datasets.load("squad")`
evaluate: boolean specifying if in evaluation mode or in training mode
evaluate: Boolean specifying if in evaluation mode or in training mode
Returns:
List of SquadExample

View File

@ -1093,7 +1093,7 @@ def is_tensor(x):
class ModelOutput(OrderedDict):
"""
Base class for all model outputs as dataclass. Has a ``__getitem__`` that allows indexing by integer or slice (like
a tuple) or strings (like a dictionnary) that will ignore the ``None`` attributes. Otherwise behaves like a
a tuple) or strings (like a dictionary) that will ignore the ``None`` attributes. Otherwise behaves like a
regular python dictionary.
.. warning::

View File

@ -197,7 +197,7 @@ class TensorBoardCallback(TrainerCallback):
Args:
tb_writer (:obj:`SummaryWriter`, `optional`):
The writer to use. Will instatiate one if not set.
The writer to use. Will instantiate one if not set.
"""
def __init__(self, tb_writer=None):

View File

@ -507,7 +507,7 @@ AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
request.
output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
messages.
local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to only look at local files (e.g., not try doanloading the model).

View File

@ -390,7 +390,7 @@ TF_AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
request.
output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
messages.
local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to only look at local files (e.g., not try doanloading the model).

View File

@ -569,7 +569,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
request.
output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
messages.
local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to only look at local files (e.g., not try doanloading the model).

View File

@ -802,7 +802,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
request.
output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
messages.
local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to only look at local files (e.g., not try doanloading the model).

View File

@ -169,7 +169,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
epsilon (:obj:`float`, `optional`, defaults to 1e-7):
The epsilon paramenter in Adam, which is a small constant for numerical stability.
amsgrad (:obj:`bool`, `optional`, default to `False`):
Wheter to apply AMSGrad varient of this algorithm or not, see
Whether to apply AMSGrad varient of this algorithm or not, see
`On the Convergence of Adam and Beyond <https://arxiv.org/abs/1904.09237>`__.
weight_decay_rate (:obj:`float`, `optional`, defaults to 0):
The weight decay to apply.

View File

@ -1766,7 +1766,7 @@ class QuestionAnsweringPipeline(Pipeline):
def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int) -> Tuple:
"""
Take the output of any :obj:`ModelForQuestionAnswering` and will generate probalities for each span to be
Take the output of any :obj:`ModelForQuestionAnswering` and will generate probabilities for each span to be
the actual answer.
In addition, it filters out some unwanted/impossible cases like answer len being greater than
@ -1807,7 +1807,7 @@ class QuestionAnsweringPipeline(Pipeline):
def span_to_answer(self, text: str, start: int, end: int) -> Dict[str, Union[str, int]]:
"""
When decoding from token probalities, this method maps token indexes to actual word in
When decoding from token probabilities, this method maps token indexes to actual word in
the initial context.
Args:

View File

@ -682,7 +682,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
token_ids_1 (:obj:`List[int]`, `optional`):
List of ids of the second sequence.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter or not the token list is already formated with special tokens for the model.
Whether or not the token list is already formated with special tokens for the model.
Returns:
A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
@ -815,7 +815,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
you want to reload it using the :meth:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
Args:
save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
Returns:
A tuple of :obj:`str`: The files saved.

View File

@ -15,7 +15,7 @@
""" Base classes common to both the slow and the fast tokenization classes:
PreTrainedTokenizerBase (host all the user fronting encoding methodes)
Special token mixing (host the special tokens logic) and
BatchEncoding (wrap the dictionnary of output with special method for the Fast tokenizers)
BatchEncoding (wrap the dictionary of output with special method for the Fast tokenizers)
"""
import copy
@ -249,7 +249,7 @@ class BatchEncoding(UserDict):
def tokens(self, batch_index: int = 0) -> List[str]:
"""
Return the list of tokens (sub-parts of the input strings after word/subword splitting and before converstion
Return the list of tokens (sub-parts of the input strings after word/subword splitting and before conversion
to integer indices) at a given batch index (only works for the output of a fast tokenizer).
Args:
@ -1121,7 +1121,7 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
return_overflowing_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to return overflowing token sequences.
return_special_tokens_mask (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter or not to return special tokens mask information.
Whether or not to return special tokens mask information.
return_offsets_mapping (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to return :obj:`(char_start, char_end)` for each token.
@ -1153,13 +1153,13 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
- **num_truncated_tokens** -- Number of tokens truncated (when a :obj:`max_length` is specified and
:obj:`return_overflowing_tokens=True`).
- **special_tokens_mask** -- List of 0s and 1s, with 0 specifying added special tokens and 1 specifying
regual sequence tokens (when :obj:`add_special_tokens=True` and :obj:`return_special_tokens_mask=True`).
regular sequence tokens (when :obj:`add_special_tokens=True` and :obj:`return_special_tokens_mask=True`).
- **length** -- The length of the inputs (when :obj:`return_length=True`)
"""
INIT_TOKENIZER_DOCSTRING = r"""
Class attributes (overridden by derived classes)
- **vocab_files_names** (:obj:`Dict[str, str]`) -- A ditionary with, as keys, the ``__init__`` keyword name of
- **vocab_files_names** (:obj:`Dict[str, str]`) -- A dictionary with, as keys, the ``__init__`` keyword name of
each vocabulary file required by the model, and as associated values, the filename for saving the associated
file (string).
- **pretrained_vocab_files_map** (:obj:`Dict[str, Dict[str, str]]`) -- A dictionary of dictionaries, with the
@ -1170,7 +1170,7 @@ INIT_TOKENIZER_DOCSTRING = r"""
:obj:`short-cut-names` of the pretrained models, and as associated values, the maximum length of the sequence
inputs of this model, or :obj:`None` if the model has no maximum input size.
- **pretrained_init_configuration** (:obj:`Dict[str, Dict[str, Any]]`) -- A dictionary with, as keys, the
:obj:`short-cut-names` of the pretrained models, and as associated values, a dictionnary of specific
:obj:`short-cut-names` of the pretrained models, and as associated values, a dictionary of specific
arguments to pass to the ``__init__`` method of the tokenizer class for this pretrained model when loading the
tokenizer with the :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizerBase.from_pretrained`
method.
@ -1688,7 +1688,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
modifying :obj:`tokenizer.do_lower_case` after creation).
Args:
save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
Returns:
A tuple of :obj:`str`: The files saved.
@ -2383,7 +2383,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
batch_size = len(encoded_inputs["input_ids"])
assert all(
len(v) == batch_size for v in encoded_inputs.values()
), "Some items in the output dictionnary have a different batch size than others."
), "Some items in the output dictionary have a different batch size than others."
if padding_strategy == PaddingStrategy.LONGEST:
max_length = max(len(inputs) for inputs in encoded_inputs["input_ids"])
@ -2547,7 +2547,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
sequence = ids + pair_ids if pair else ids
token_type_ids = [0] * len(ids) + ([0] * len(pair_ids) if pair else [])
# Build output dictionnary
# Build output dictionary
encoded_inputs["input_ids"] = sequence
if return_token_type_ids:
encoded_inputs["token_type_ids"] = token_type_ids
@ -2819,7 +2819,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
token_ids_1 (:obj:`List[int]`, `optional`):
List of ids of the second sequence.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter or not the token list is already formated with special tokens for the model.
Whether or not the token list is already formated with special tokens for the model.
Returns:
A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

View File

@ -552,7 +552,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
you want to reload it using the :meth:`~transformers.PreTrainedTokenizerFast.from_pretrained` class method.
Args:
save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
Returns:
A tuple of :obj:`str`: The files saved.

View File

@ -895,7 +895,7 @@ class Trainer:
- the documentation of `tune.run <https://docs.ray.io/en/latest/tune/api_docs/execution.html#tune-run>`__
Returns:
:class:`transformers.trainer_utils.BestRun`: All the informations about the best run.
:class:`transformers.trainer_utils.BestRun`: All the information about the best run.
"""
if backend is None:
backend = default_hp_search_backend()

View File

@ -94,7 +94,7 @@ class TrainingArguments:
logging_dir (:obj:`str`, `optional`):
Tensorboard log directory. Will default to `runs/**CURRENT_DATETIME_HOSTNAME**`.
logging_first_step (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter to log and evalulate the first :obj:`global_step` or not.
Whether to log and evaluate the first :obj:`global_step` or not.
logging_steps (:obj:`int`, `optional`, defaults to 500):
Number of update steps between two logs.
save_steps (:obj:`int`, `optional`, defaults to 500):
@ -114,7 +114,7 @@ class TrainingArguments:
local_rank (:obj:`int`, `optional`, defaults to -1):
During distributed training, the rank of the process.
tpu_num_cores (:obj:`int`, `optional`):
When training on TPU, the mumber of TPU cores (automatically passed by launcher script).
When training on TPU, the number of TPU cores (automatically passed by launcher script).
debug (:obj:`bool`, `optional`, defaults to :obj:`False`):
When training on TPU, whether to print debug metrics or not.
dataloader_drop_last (:obj:`bool`, `optional`, defaults to :obj:`False`):
@ -159,7 +159,7 @@ class TrainingArguments:
Will default to :obj:`"loss"` if unspecified and :obj:`load_best_model_at_end=True` (to use the evaluation
loss).
If you set this value, :obj:`greater_is_better` will defaut to :obj:`True`. Don't forget to set it to
If you set this value, :obj:`greater_is_better` will default to :obj:`True`. Don't forget to set it to
:obj:`False` if your metric is better when lower.
greater_is_better (:obj:`bool`, `optional`)
Use in conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better

View File

@ -66,7 +66,7 @@ class TFTrainingArguments(TrainingArguments):
logging_dir (:obj:`str`, `optional`):
Tensorboard log directory. Will default to `runs/**CURRENT_DATETIME_HOSTNAME**`.
logging_first_step (:obj:`bool`, `optional`, defaults to :obj:`False`):
Wheter to log and evalulate the first :obj:`global_step` or not.
Whether to log and evaluate the first :obj:`global_step` or not.
logging_steps (:obj:`int`, `optional`, defaults to 500):
Number of update steps between two logs.
save_steps (:obj:`int`, `optional`, defaults to 500):
@ -86,7 +86,7 @@ class TFTrainingArguments(TrainingArguments):
local_rank (:obj:`int`, `optional`, defaults to -1):
During distributed training, the rank of the process.
tpu_num_cores (:obj:`int`, `optional`):
When training on TPU, the mumber of TPU cores (automatically passed by launcher script).
When training on TPU, the number of TPU cores (automatically passed by launcher script).
debug (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to activate the trace to record computation graphs and profiling information or not.
dataloader_drop_last (:obj:`bool`, `optional`, defaults to :obj:`False`):