Fix some typos. (#17560)

* Fix some typos.

Signed-off-by: Yulv-git <yulvchi@qq.com>

* Fix typo.

Signed-off-by: Yulv-git <yulvchi@qq.com>

* make fixup.
This commit is contained in:
Yulv-git 2022-07-11 17:00:13 +08:00 committed by GitHub
parent ad28ca291b
commit 95113d1365
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
54 changed files with 80 additions and 68 deletions

View File

@ -289,7 +289,7 @@ from huggingface_hub import notebook_login
notebook_login()
```
You can then push to to your own namespace (or an organization you are a member of) like this:
You can then push to your own namespace (or an organization you are a member of) like this:
```py
resnet50d.push_to_hub("custom-resnet50d")

View File

@ -37,7 +37,7 @@ predicted token ids.
The feature extractor depends on `torchaudio` and the tokenizer depends on `sentencepiece` so be sure to
install those packages before running the examples. You could either install those as extra speech dependencies with
`pip install transformers"[speech, sentencepiece]"` or install the packages seperately with `pip install torchaudio sentencepiece`. Also `torchaudio` requires the development version of the [libsndfile](http://www.mega-nerd.com/libsndfile/) package which can be installed via a system package manager. On Ubuntu it can
`pip install transformers"[speech, sentencepiece]"` or install the packages separately with `pip install torchaudio sentencepiece`. Also `torchaudio` requires the development version of the [libsndfile](http://www.mega-nerd.com/libsndfile/) package which can be installed via a system package manager. On Ubuntu it can
be installed as follows: `apt install libsndfile1-dev`

View File

@ -1226,7 +1226,7 @@ This whole process would have been much easier if we only could set something li
experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
Github Actions don't support it at the moment.
You can vote for this feature and see where it is at at these CI-specific threads:
You can vote for this feature and see where it is at these CI-specific threads:
- [Github Actions:](https://github.com/actions/toolkit/issues/399)
- [CircleCI:](https://ideas.circleci.com/ideas/CCI-I-344)

View File

@ -140,7 +140,7 @@ class TokenClassificationTask:
# it easier for the model to learn the concept of sequences.
#
# For classification tasks, the first vector (corresponding to [CLS]) is
# used as as the "sentence vector". Note that this only makes sense because
# used as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
tokens += [sep_token]
label_ids += [pad_token_label_id]

View File

@ -43,7 +43,7 @@ A good metric to observe during training is the gradient norm which should ideal
When training a model on large datasets it is recommended to run the data preprocessing
in a first run in a **non-distributed** mode via `--preprocessing_only` so that
when running the model in **distributed** mode in a second step the preprocessed data
when running the model in **distributed** mode in a second step the preprocessed data
can easily be loaded on each distributed device.
---

View File

@ -91,7 +91,7 @@ python scripts/initialize_model.py \
--model_name codeparrot \
--push_to_hub True
```
This will initialize a new model with the architecture and configuration of `gpt2-large` and use the tokenizer to appropriately size the input embeddings. Finally, the initilaized model is pushed the the hub.
This will initialize a new model with the architecture and configuration of `gpt2-large` and use the tokenizer to appropriately size the input embeddings. Finally, the initilaized model is pushed the hub.
We can either pass the name of a text dataset or a pretokenized dataset which speeds up training a bit.
Now that the tokenizer and model are also ready we can start training the model. The main training script is built with `accelerate` to scale across a wide range of platforms and infrastructure scales. We train two models with [110M](https://huggingface.co/lvwerra/codeparrot-small/) and [1.5B](https://huggingface.co/lvwerra/codeparrot/) parameters for 25-30B tokens on a 16xA100 (40GB) machine which takes 1 day and 1 week, respectively.

View File

@ -43,7 +43,7 @@ if __name__ == "__main__":
with open(args.data_file, "rb") as fp:
data = pickle.load(fp)
logger.info("Counting occurences for MLM.")
logger.info("Counting occurrences for MLM.")
counter = Counter()
for tk_ids in data:
counter.update(tk_ids)

View File

@ -49,7 +49,7 @@ At the end of the community week, each team should submit a demo of their projec
- **23.06.** Official announcement of the community week. Make sure to sign-up in [this google form](https://forms.gle/tVGPhjKXyEsSgUcs8).
- **23.06. - 30.06.** Participants will be added to an internal Slack channel. Project ideas can be proposed here and groups of 3-5 are formed. Read this document for more information.
- **30.06.** Release of all relevant training scripts in JAX/Flax as well as other documents on how to set up a TPU, how to use the training scripts, how to submit a demo, tips & tricks for JAX/Flax, tips & tricks for efficient use of the hub.
- **30.06.** Release of all relevant training scripts in JAX/Flax as well as other documents on how to set up a TPU, how to use the training scripts, how to submit a demo, tips & tricks for JAX/Flax, tips & tricks for efficient use of the hub.
- **30.06. - 2.07.** Talks about JAX/Flax, TPU, Transformers, Computer Vision & NLP will be held.
- **7.07.** Start of the community week! Access to TPUv3-8 will be given to each team.
- **7.07. - 14.07.** The Hugging Face & JAX/Flax & Cloud team will be available for any questions, problems the teams might run into.

View File

@ -106,7 +106,7 @@ def main():
return start_logits, end_logits, jnp.argmax(pooled_logits, axis=-1)
def evaluate(example):
# encode question and context so that they are seperated by a tokenizer.sep_token and cut at max_length
# encode question and context so that they are separated by a tokenizer.sep_token and cut at max_length
inputs = tokenizer(
example["question"],
example["context"],

View File

@ -22,7 +22,7 @@ the JAX/Flax backend and the [`pjit`](https://jax.readthedocs.io/en/latest/jax.e
> Note: The example is experimental and might have bugs. Also currently it only supports single V3-8.
The `partition.py` file defines the `PyTree` of `ParitionSpec` for the GPTNeo model which describes how the model will be sharded.
The actual sharding is auto-matically handled by `pjit`. The weights are sharded accross all local devices.
The actual sharding is auto-matically handled by `pjit`. The weights are sharded across all local devices.
To adapt the script for other models, we need to also change the `ParitionSpec` accordingly.
TODO: Add more explantion.

View File

@ -78,7 +78,7 @@ class FlaxBeamSearchOutput(ModelOutput):
sequences (`jnp.ndarray` of shape `(batch_size, max_length)`):
The generated sequences.
scores (`jnp.ndarray` of shape `(batch_size,)`):
The scores (log probabilites) of the generated sequences.
The scores (log probabilities) of the generated sequences.
"""
sequences: jnp.ndarray = None

View File

@ -277,7 +277,7 @@ class PushToHubCallback(Callback):
for instance `"user_name/model"`, which allows you to push to an organization you are a member of with
`"organization_name/model"`.
Will default to to the name of `output_dir`.
Will default to the name of `output_dir`.
hub_token (`str`, *optional*):
The token to use to push the model to the Hub. Will default to the token in the cache folder obtained with
`huggingface-cli login`.

View File

@ -1267,7 +1267,7 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode
super().__init__(config, *inputs, **kwargs)
self.model = TFBartMainLayer(config, load_weight_prefix=load_weight_prefix, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -1253,7 +1253,7 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
super().__init__(config, *inputs, **kwargs)
self.model = TFBlenderbotMainLayer(config, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -1240,7 +1240,7 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel
super().__init__(config, *inputs, **kwargs)
self.model = TFBlenderbotSmallMainLayer(config, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -184,7 +184,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -2054,7 +2054,7 @@ class DetrLoss(nn.Module):
# Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets)
# Compute the average number of target boxes accross all nodes, for normalization purposes
# Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["class_labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
# (Niels): comment out function below, distributed training to be added

View File

@ -212,7 +212,7 @@ class TFElectraSelfOutput(tf.keras.layers.Layer):
return hidden_states
# Copied from from transformers.models.bert.modeling_tf_bert.TFBertAttention with Bert->Electra
# Copied from transformers.models.bert.modeling_tf_bert.TFBertAttention with Bert->Electra
class TFElectraAttention(tf.keras.layers.Layer):
def __init__(self, config: ElectraConfig, **kwargs):
super().__init__(**kwargs)

View File

@ -83,7 +83,7 @@ class FlavaImageConfig(PretrainedConfig):
>>> # Initializing a FlavaImageModel with style configuration
>>> configuration = FlavaImageConfig()
>>> # Initializing a FlavaImageModel model from the style configuration
>>> # Initializing a FlavaImageModel model from the style configuration
>>> model = FlavaImageModel(configuration)
>>> # Accessing the model configuration
@ -212,7 +212,7 @@ class FlavaTextConfig(PretrainedConfig):
>>> # Initializing a FlavaTextModel with style configuration
>>> configuration = FlavaTextConfig()
>>> # Initializing a FlavaTextConfig from the style configuration
>>> # Initializing a FlavaTextConfig from the style configuration
>>> model = FlavaTextModel(configuration)
>>> # Accessing the model configuration
@ -321,7 +321,7 @@ class FlavaMultimodalConfig(PretrainedConfig):
>>> # Initializing a FlavaMultimodalModel with style configuration
>>> configuration = FlavaMultimodalConfig()
>>> # Initializing a FlavaMultimodalModel model from the style configuration
>>> # Initializing a FlavaMultimodalModel model from the style configuration
>>> model = FlavaMultimodalModel(configuration)
>>> # Accessing the model configuration

View File

@ -82,10 +82,10 @@ class HubertConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -174,7 +174,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -203,7 +203,7 @@ def _compute_mask_indices(
Computes random mask spans for a given shape
Args:
shape: the the shape for which to compute masks.
shape: the shape for which to compute masks.
should be of size 2 where first element is batch size and 2nd is timesteps
attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
mask_prob:

View File

@ -2330,7 +2330,7 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
super().__init__(config, *inputs, **kwargs)
self.led = TFLEDMainLayer(config, name="led")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -1110,7 +1110,7 @@ class LxmertForPreTraining(LxmertPreTrainedModel):
def get_qa_logit_layer(self) -> nn.Module:
"""
Returns the the linear layer that produces question answering logits.
Returns the linear layer that produces question answering logits.
Returns:
`nn.Module`: A torch module mapping the question answering prediction hidden states or `None` if LXMERT
@ -1341,7 +1341,7 @@ class LxmertForQuestionAnswering(LxmertPreTrainedModel):
def get_qa_logit_layer(self) -> nn.Module:
"""
Returns the the linear layer that produces question answering logits
Returns the linear layer that produces question answering logits
Returns:
`nn.Module`: A torch module mapping the question answering prediction hidden states. `None`: A NoneType

View File

@ -1283,7 +1283,7 @@ class TFMarianMTModel(TFMarianPreTrainedModel, TFCausalLanguageModelingLoss):
super().__init__(config, *inputs, **kwargs)
self.model = TFMarianMainLayer(config, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -1912,7 +1912,7 @@ class MaskFormerLoss(nn.Module):
def get_num_masks(self, class_labels: torch.Tensor, device: torch.device) -> torch.Tensor:
"""
Computes the average number of target masks accross the batch, for normalization purposes.
Computes the average number of target masks across the batch, for normalization purposes.
"""
num_masks = sum([len(classes) for classes in class_labels])
num_masks_pt = torch.as_tensor([num_masks], dtype=torch.float, device=device)

View File

@ -1280,7 +1280,7 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo
super().__init__(config, *inputs, **kwargs)
self.model = TFMBartMainLayer(config, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -1292,7 +1292,7 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua
super().__init__(config, *inputs, **kwargs)
self.model = TFPegasusMainLayer(config, name="model")
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -28,7 +28,7 @@ RAG_CONFIG_DOC = r"""
title_sep (`str`, *optional*, defaults to `" / "`):
Separator inserted between the title and the text of the retrieved document when calling [`RagRetriever`].
doc_sep (`str`, *optional*, defaults to `" // "`):
Separator inserted between the the text of the retrieved document and the original input when calling
Separator inserted between the text of the retrieved document and the original input when calling
[`RagRetriever`].
n_docs (`int`, *optional*, defaults to 5):
Number of documents to retrieve.

View File

@ -81,10 +81,10 @@ class SEWConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 1, 3, 1, 3, 1, 3, 1, 2, 1, 2, 1)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -174,7 +174,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -99,10 +99,10 @@ class SEWDConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 1, 3, 1, 3, 1, 3, 1, 2, 1, 2, 1)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -175,7 +175,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -85,10 +85,10 @@ class UniSpeechConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -210,7 +210,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -86,10 +86,10 @@ class UniSpeechSatConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -224,7 +224,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -120,7 +120,7 @@ def _compute_mask_indices(
CPU as part of the preprocessing during training.
Args:
shape: the the shape for which to compute masks.
shape: the shape for which to compute masks.
should be of size 2 where first element is batch size and 2nd is timesteps
mask_prob:
probability for each token to be chosen as start of the span to be masked. this will be multiplied by

View File

@ -244,7 +244,7 @@ def _compute_mask_indices(
Computes random mask spans for a given shape
Args:
shape: the the shape for which to compute masks.
shape: the shape for which to compute masks.
should be of size 2 where first element is batch size and 2nd is timesteps
attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
mask_prob:

View File

@ -234,7 +234,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -231,7 +231,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -82,10 +82,10 @@ class WavLMConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.

View File

@ -183,7 +183,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length

View File

@ -1069,7 +1069,7 @@ class YolosLoss(nn.Module):
# Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets)
# Compute the average number of target boxes accross all nodes, for normalization purposes
# Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["class_labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
# (Niels): comment out function below, distributed training to be added

View File

@ -487,7 +487,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str):
"""
Fill the input_or_ouputs mapping with past_key_values dynamic axes considering.
Fill the input_or_outputs mapping with past_key_values dynamic axes considering.
Args:
inputs_or_outputs: The mapping to fill.

View File

@ -412,8 +412,8 @@ class TrainingArguments:
down the training and evaluation speed.
push_to_hub (`bool`, *optional*, defaults to `False`):
Whether or not to push the model to the Hub every time the model is saved. If this is activated,
`output_dir` will begin a git directory synced with the the repo (determined by `hub_model_id`) and the
content will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
`output_dir` will begin a git directory synced with the repo (determined by `hub_model_id`) and the content
will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
[`~Trainer.save_model`] will also trigger a push.
<Tip warning={true}>
@ -434,7 +434,7 @@ class TrainingArguments:
`"organization_name/model"`. Will default to `user_name/output_dir_name` with *output_dir_name* being the
name of `output_dir`.
Will default to to the name of `output_dir`.
Will default to the name of `output_dir`.
hub_strategy (`str` or [`~trainer_utils.HubStrategy`], *optional*, defaults to `"every_save"`):
Defines the scope of what is pushed to the Hub and when. Possible values are:

View File

@ -990,7 +990,7 @@ tokenizer.
For [camelcase name of model], the tokenizer files can be found here:
- [To be filled out by mentor]
and having implemented the 🤗Transformers' version of the tokenizer can be loaded as follows:
and having implemented the 🤗 Transformers' version of the tokenizer can be loaded as follows:
[To be filled out by mentor]

View File

@ -2821,7 +2821,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model")
self.model._set_save_spec(inputs=self.serving.input_signature)
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)

View File

@ -2183,7 +2183,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
@ -2193,7 +2195,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple

View File

@ -2097,7 +2097,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
@ -2107,7 +2109,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple

View File

@ -281,7 +281,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)
# overwrite from common since TFViTMAEForPretraining outputs loss along with
# logits and mask indices. loss and mask indicies are not suitable for integration
# logits and mask indices. loss and mask indices are not suitable for integration
# with other keras modules.
def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@ -278,7 +278,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
sentence = """Enzo works at the the UN"""
sentence = """Enzo works at the UN"""
token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
output = token_classifier(sentence)
self.assertEqual(

View File

@ -990,7 +990,9 @@ class TokenizerTesterMixin:
sequence = tokenizer.encode(seq_0, add_special_tokens=False)
total_length = len(sequence)
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
@ -1000,7 +1002,9 @@ class TokenizerTesterMixin:
sequence1 = tokenizer(seq_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple

View File

@ -53,7 +53,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
return "".join(lines[start_index:end_index]), start_index, end_index, lines
# Add here suffixes that are used to identify models, seperated by |
# Add here suffixes that are used to identify models, separated by |
ALLOWED_MODEL_SUFFIXES = "Model|Encoder|Decoder|ForConditionalGeneration"
# Regexes that match TF/Flax/PT model names.
_re_tf_models = re.compile(r"TF(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration)")