mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix typing for None valued variables (#37004)
Fix typing for None-able variables
This commit is contained in:
parent
8c5e29bad5
commit
de77f5b1ec
@ -19,6 +19,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
from contextlib import contextmanager
|
||||
from typing import Optional
|
||||
|
||||
from transformers.utils.import_utils import export
|
||||
|
||||
@ -284,7 +285,7 @@ def model_addition_debugger(cls):
|
||||
|
||||
@export(backends=("torch",))
|
||||
@contextmanager
|
||||
def model_addition_debugger_context(model, debug_path: str = None):
|
||||
def model_addition_debugger_context(model, debug_path: Optional[str] = None):
|
||||
"""
|
||||
# Model addition debugger - context manager for model adders
|
||||
This context manager is a power user tool intended for model adders.
|
||||
|
@ -42,7 +42,7 @@ class BaseModelOutput(ModelOutput):
|
||||
heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -62,7 +62,7 @@ class BaseModelOutputWithNoAttention(ModelOutput):
|
||||
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
|
||||
@ -92,8 +92,8 @@ class BaseModelOutputWithPooling(ModelOutput):
|
||||
heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
pooler_output: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
pooler_output: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -115,8 +115,8 @@ class BaseModelOutputWithPoolingAndNoAttention(ModelOutput):
|
||||
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
pooler_output: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
pooler_output: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
|
||||
@ -153,7 +153,7 @@ class BaseModelOutputWithPast(ModelOutput):
|
||||
heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -186,7 +186,7 @@ class BaseModelOutputWithCrossAttentions(ModelOutput):
|
||||
weighted average in the cross-attention heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -233,8 +233,8 @@ class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
|
||||
input) to speed up sequential decoding.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
pooler_output: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
pooler_output: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -280,7 +280,7 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
|
||||
weighted average in the cross-attention heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -327,12 +327,12 @@ class MoECausalLMOutputWithPast(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
z_loss: torch.FloatTensor = None
|
||||
aux_loss: torch.FloatTensor = None
|
||||
z_loss: Optional[torch.FloatTensor] = None
|
||||
aux_loss: Optional[torch.FloatTensor] = None
|
||||
router_logits: Optional[Tuple[torch.FloatTensor]] = None
|
||||
|
||||
|
||||
@ -362,7 +362,7 @@ class MoEModelOutput(ModelOutput):
|
||||
loss and the z_loss for Mixture of Experts models.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
router_probs: Optional[Tuple[torch.FloatTensor]] = None
|
||||
@ -403,7 +403,7 @@ class MoeModelOutputWithPast(ModelOutput):
|
||||
loss for Mixture of Experts models.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -452,7 +452,7 @@ class MoeCausalLMOutputWithPast(ModelOutput):
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
aux_loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -504,7 +504,7 @@ class MoEModelOutputWithPastAndCrossAttentions(ModelOutput):
|
||||
loss and the z_loss for Mixture of Experts models.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -563,7 +563,7 @@ class Seq2SeqModelOutput(ModelOutput):
|
||||
self-attention heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -633,7 +633,7 @@ class Seq2SeqMoEModelOutput(ModelOutput):
|
||||
modules.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -669,7 +669,7 @@ class CausalLMOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -704,7 +704,7 @@ class CausalLMOutputWithPast(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -747,7 +747,7 @@ class CausalLMOutputWithCrossAttentions(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -784,7 +784,7 @@ class SequenceClassifierOutputWithPast(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -814,7 +814,7 @@ class MaskedLMOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -869,7 +869,7 @@ class Seq2SeqLMOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -938,11 +938,11 @@ class Seq2SeqMoEOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
encoder_z_loss: torch.FloatTensor = None
|
||||
decoder_z_loss: torch.FloatTensor = None
|
||||
encoder_aux_loss: torch.FloatTensor = None
|
||||
decoder_aux_loss: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
encoder_z_loss: Optional[torch.FloatTensor] = None
|
||||
decoder_z_loss: Optional[torch.FloatTensor] = None
|
||||
encoder_aux_loss: Optional[torch.FloatTensor] = None
|
||||
decoder_aux_loss: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -979,7 +979,7 @@ class NextSentencePredictorOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1008,7 +1008,7 @@ class SequenceClassifierOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1063,7 +1063,7 @@ class Seq2SeqSequenceClassifierOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -1099,7 +1099,7 @@ class MultipleChoiceModelOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1128,7 +1128,7 @@ class TokenClassifierOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1159,8 +1159,8 @@ class QuestionAnsweringModelOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
start_logits: torch.FloatTensor = None
|
||||
end_logits: torch.FloatTensor = None
|
||||
start_logits: Optional[torch.FloatTensor] = None
|
||||
end_logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1217,8 +1217,8 @@ class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
start_logits: torch.FloatTensor = None
|
||||
end_logits: torch.FloatTensor = None
|
||||
start_logits: Optional[torch.FloatTensor] = None
|
||||
end_logits: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -1261,7 +1261,7 @@ class SemanticSegmenterOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1289,7 +1289,7 @@ class ImageClassifierOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1311,7 +1311,7 @@ class ImageClassifierOutputWithNoAttention(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
|
||||
@ -1340,7 +1340,7 @@ class DepthEstimatorOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
predicted_depth: torch.FloatTensor = None
|
||||
predicted_depth: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1368,7 +1368,7 @@ class ImageSuperResolutionOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
reconstruction: torch.FloatTensor = None
|
||||
reconstruction: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1396,8 +1396,8 @@ class Wav2Vec2BaseModelOutput(ModelOutput):
|
||||
heads.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
extract_features: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
extract_features: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1428,8 +1428,8 @@ class XVectorOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
embeddings: torch.FloatTensor = None
|
||||
logits: Optional[torch.FloatTensor] = None
|
||||
embeddings: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1456,7 +1456,7 @@ class BackboneOutput(ModelOutput):
|
||||
heads.
|
||||
"""
|
||||
|
||||
feature_maps: Tuple[torch.FloatTensor] = None
|
||||
feature_maps: Optional[Tuple[torch.FloatTensor]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
@ -1491,8 +1491,8 @@ class BaseModelOutputWithPoolingAndProjection(ModelOutput):
|
||||
Text embeddings before the projection layer, used to mimic the last hidden state of the teacher encoder.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
pooler_output: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
pooler_output: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
projection_state: Optional[Tuple[torch.FloatTensor]] = None
|
||||
@ -1548,7 +1548,7 @@ class Seq2SeqSpectrogramOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
spectrogram: torch.FloatTensor = None
|
||||
spectrogram: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -1617,7 +1617,7 @@ class Seq2SeqTSModelOutput(ModelOutput):
|
||||
Static features of each time series' in a batch which are copied to the covariates at inference time.
|
||||
"""
|
||||
|
||||
last_hidden_state: torch.FloatTensor = None
|
||||
last_hidden_state: Optional[torch.FloatTensor] = None
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
@ -1713,7 +1713,7 @@ class SampleTSPredictionOutput(ModelOutput):
|
||||
Sampled values from the chosen distribution.
|
||||
"""
|
||||
|
||||
sequences: torch.FloatTensor = None
|
||||
sequences: Optional[torch.FloatTensor] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -1739,7 +1739,7 @@ class MaskedImageModelingOutput(ModelOutput):
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
reconstruction: torch.FloatTensor = None
|
||||
reconstruction: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||||
|
||||
|
@ -19,7 +19,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
from os import path
|
||||
from typing import Dict, Union
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
import torch
|
||||
from huggingface_hub import split_torch_state_dict_into_shards
|
||||
@ -172,7 +172,7 @@ def convert_mamba_ssm_checkpoint_file_to_huggingface_model_file(
|
||||
mamba_ssm_checkpoint_path: str,
|
||||
precision: str,
|
||||
output_dir: str,
|
||||
tokenizer_path: str = None,
|
||||
tokenizer_path: Optional[str] = None,
|
||||
save_model: Union[bool, str] = True,
|
||||
) -> None:
|
||||
# load tokenizer if provided, this will be used to set the
|
||||
|
@ -175,7 +175,7 @@ class BarkProcessor(ProcessorMixin):
|
||||
|
||||
super().save_pretrained(save_directory, push_to_hub, **kwargs)
|
||||
|
||||
def _load_voice_preset(self, voice_preset: str = None, **kwargs):
|
||||
def _load_voice_preset(self, voice_preset: Optional[str] = None, **kwargs):
|
||||
voice_preset_paths = self.speaker_embeddings[voice_preset]
|
||||
|
||||
voice_preset_dict = {}
|
||||
|
@ -412,7 +412,7 @@ class FlaxBigBirdSelfAttention(nn.Module):
|
||||
|
||||
class FlaxBigBirdBlockSparseAttention(nn.Module):
|
||||
config: BigBirdConfig
|
||||
block_sparse_seed: int = None
|
||||
block_sparse_seed: Optional[int] = None
|
||||
dtype: jnp.dtype = jnp.float32
|
||||
|
||||
def setup(self):
|
||||
@ -1262,7 +1262,7 @@ class FlaxBigBirdSelfOutput(nn.Module):
|
||||
|
||||
class FlaxBigBirdAttention(nn.Module):
|
||||
config: BigBirdConfig
|
||||
layer_id: int = None
|
||||
layer_id: Optional[int] = None
|
||||
causal: bool = False
|
||||
dtype: jnp.dtype = jnp.float32
|
||||
|
||||
@ -1362,7 +1362,7 @@ class FlaxBigBirdOutput(nn.Module):
|
||||
|
||||
class FlaxBigBirdLayer(nn.Module):
|
||||
config: BigBirdConfig
|
||||
layer_id: int = None
|
||||
layer_id: Optional[int] = None
|
||||
dtype: jnp.dtype = jnp.float32 # the dtype of the computation
|
||||
|
||||
def setup(self):
|
||||
|
@ -180,7 +180,7 @@ class BitImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -176,7 +176,7 @@ class ChameleonImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -169,7 +169,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -92,7 +92,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
|
||||
return_attention_mask=False, # pad inputs to max length with silence token (zero) and no attention mask
|
||||
frequency_min: float = 0,
|
||||
frequency_max: float = 14_000,
|
||||
top_db: int = None,
|
||||
top_db: Optional[int] = None,
|
||||
truncation: str = "fusion",
|
||||
padding: str = "repeatpad",
|
||||
**kwargs,
|
||||
@ -258,7 +258,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
|
||||
def __call__(
|
||||
self,
|
||||
raw_speech: Union[np.ndarray, List[float], List[np.ndarray], List[List[float]]],
|
||||
truncation: str = None,
|
||||
truncation: Optional[str] = None,
|
||||
padding: Optional[str] = None,
|
||||
max_length: Optional[int] = None,
|
||||
sampling_rate: Optional[int] = None,
|
||||
|
@ -204,7 +204,7 @@ class CLIPImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -1360,7 +1360,7 @@ class CLIPSegForImageSegmentation(CLIPSegPreTrainedModel):
|
||||
|
||||
def get_conditional_embeddings(
|
||||
self,
|
||||
batch_size: int = None,
|
||||
batch_size: Optional[int] = None,
|
||||
input_ids: Optional[torch.Tensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.Tensor] = None,
|
||||
|
@ -287,7 +287,7 @@ class DacResidualVectorQuantize(nn.Module):
|
||||
self.quantizers = nn.ModuleList([DacVectorQuantize(config) for i in range(config.n_codebooks)])
|
||||
self.quantizer_dropout = quantizer_dropout
|
||||
|
||||
def forward(self, hidden_state, n_quantizers: int = None):
|
||||
def forward(self, hidden_state, n_quantizers: Optional[int] = None):
|
||||
"""
|
||||
Quantizes the input tensor using a fixed set of codebooks and returns corresponding codebook vectors.
|
||||
Args:
|
||||
@ -608,7 +608,7 @@ class DacModel(DacPreTrainedModel):
|
||||
def encode(
|
||||
self,
|
||||
input_values: torch.Tensor,
|
||||
n_quantizers: int = None,
|
||||
n_quantizers: Optional[int] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
):
|
||||
"""
|
||||
@ -681,7 +681,7 @@ class DacModel(DacPreTrainedModel):
|
||||
def forward(
|
||||
self,
|
||||
input_values: torch.Tensor,
|
||||
n_quantizers: int = None,
|
||||
n_quantizers: Optional[int] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
):
|
||||
"""
|
||||
|
@ -462,7 +462,7 @@ class SPMTokenizer:
|
||||
|
||||
return ["".join(x) for x in output]
|
||||
|
||||
def save_pretrained(self, path: str, filename_prefix: str = None):
|
||||
def save_pretrained(self, path: str, filename_prefix: Optional[str] = None):
|
||||
filename = VOCAB_FILES_NAMES[list(VOCAB_FILES_NAMES.keys())[0]]
|
||||
if filename_prefix is not None:
|
||||
filename = filename_prefix + "-" + filename
|
||||
|
@ -182,7 +182,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
do_normalize: Optional[bool] = None,
|
||||
|
@ -14,6 +14,8 @@
|
||||
# limitations under the License.
|
||||
"""Graphormer model configuration"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
@ -159,8 +161,8 @@ class GraphormerConfig(PretrainedConfig):
|
||||
traceable: bool = False,
|
||||
q_noise: float = 0.0,
|
||||
qn_block_size: int = 8,
|
||||
kdim: int = None,
|
||||
vdim: int = None,
|
||||
kdim: Optional[int] = None,
|
||||
vdim: Optional[int] = None,
|
||||
bias: bool = True,
|
||||
self_attention: bool = True,
|
||||
pad_token_id=0,
|
||||
|
@ -162,7 +162,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
|
||||
lower_case=False,
|
||||
delimiter=None,
|
||||
vocab_file=None,
|
||||
pretrained_vocab_file: str = None,
|
||||
pretrained_vocab_file: Optional[str] = None,
|
||||
never_split=None,
|
||||
unk_token="<unk>",
|
||||
eos_token="<eos>",
|
||||
|
@ -280,7 +280,7 @@ class TvltImageProcessor(BaseImageProcessor):
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
patch_size: List[int] = None,
|
||||
num_frames: int = None,
|
||||
num_frames: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: Dict[str, int] = None,
|
||||
|
@ -22,7 +22,7 @@ import sys
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@ -163,7 +163,7 @@ def convert_weight_and_push(
|
||||
print(f"Pushed {checkpoint_name}")
|
||||
|
||||
|
||||
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
|
||||
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
|
||||
filename = "imagenet-1k-id2label.json"
|
||||
num_labels = 1000
|
||||
|
||||
|
@ -196,7 +196,7 @@ class ViTHybridImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -161,7 +161,7 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = False,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
do_reduce_labels: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
@ -299,14 +299,14 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
keep_aspect_ratio: bool = None,
|
||||
ensure_multiple_of: int = None,
|
||||
ensure_multiple_of: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
):
|
||||
if do_reduce_labels:
|
||||
@ -340,14 +340,14 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
keep_aspect_ratio: bool = None,
|
||||
ensure_multiple_of: int = None,
|
||||
ensure_multiple_of: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
) -> np.ndarray:
|
||||
@ -391,7 +391,7 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
keep_aspect_ratio: bool = None,
|
||||
ensure_multiple_of: int = None,
|
||||
ensure_multiple_of: Optional[int] = None,
|
||||
do_reduce_labels: bool = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
):
|
||||
@ -437,9 +437,9 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
images: ImageInput,
|
||||
segmentation_maps: Optional[ImageInput] = None,
|
||||
do_resize: bool = None,
|
||||
size: int = None,
|
||||
size: Optional[int] = None,
|
||||
keep_aspect_ratio: bool = None,
|
||||
ensure_multiple_of: int = None,
|
||||
ensure_multiple_of: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
@ -447,7 +447,7 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
do_reduce_labels: Optional[bool] = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
|
@ -398,8 +398,8 @@ class EncoderDecoderModel(PreTrainedModel, GenerationMixin):
|
||||
@classmethod
|
||||
def from_encoder_decoder_pretrained(
|
||||
cls,
|
||||
encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
|
@ -311,8 +311,8 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
@classmethod
|
||||
def from_encoder_decoder_pretrained(
|
||||
cls,
|
||||
encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> TFPreTrainedModel:
|
||||
|
@ -172,7 +172,7 @@ class EsmConfig(PretrainedConfig):
|
||||
|
||||
@dataclass
|
||||
class EsmFoldConfig:
|
||||
esm_type: str = None
|
||||
esm_type: Optional[str] = None
|
||||
fp16_esm: bool = True
|
||||
use_esm_attn_map: bool = False
|
||||
esm_ablate_pairwise: bool = False
|
||||
|
@ -249,7 +249,7 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
codebook_size: bool = None,
|
||||
codebook_resample: int = PILImageResampling.LANCZOS,
|
||||
codebook_do_center_crop: bool = True,
|
||||
codebook_crop_size: int = None,
|
||||
codebook_crop_size: Optional[int] = None,
|
||||
codebook_do_rescale: bool = True,
|
||||
codebook_rescale_factor: Union[int, float] = 1 / 255,
|
||||
codebook_do_map_pixels: bool = True,
|
||||
|
@ -104,8 +104,8 @@ class Gemma3ImageProcessor(BaseImageProcessor):
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
do_pan_and_scan: bool = None,
|
||||
pan_and_scan_min_crop_size: int = None,
|
||||
pan_and_scan_max_num_crops: int = None,
|
||||
pan_and_scan_min_crop_size: Optional[int] = None,
|
||||
pan_and_scan_max_num_crops: Optional[int] = None,
|
||||
pan_and_scan_min_ratio_to_activate: float = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
@ -253,8 +253,8 @@ class Gemma3ImageProcessor(BaseImageProcessor):
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
do_pan_and_scan: bool = None,
|
||||
pan_and_scan_min_crop_size: int = None,
|
||||
pan_and_scan_max_num_crops: int = None,
|
||||
pan_and_scan_min_crop_size: Optional[int] = None,
|
||||
pan_and_scan_max_num_crops: Optional[int] = None,
|
||||
pan_and_scan_min_ratio_to_activate: float = None,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
|
@ -509,7 +509,7 @@ class IdeficsAttention(nn.Module):
|
||||
is_cross_attention: bool = False,
|
||||
config: PretrainedConfig = None,
|
||||
qk_layer_norms: bool = False,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.hidden_size = hidden_size
|
||||
@ -675,7 +675,7 @@ class IdeficsAttention(nn.Module):
|
||||
|
||||
# this was adapted from LlamaDecoderLayer
|
||||
class IdeficsDecoderLayer(nn.Module):
|
||||
def __init__(self, config: IdeficsConfig, layer_idx: int = None):
|
||||
def __init__(self, config: IdeficsConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.hidden_size = config.hidden_size
|
||||
self.self_attn = IdeficsAttention(
|
||||
@ -754,7 +754,7 @@ class IdeficsDecoderLayer(nn.Module):
|
||||
|
||||
|
||||
class IdeficsGatedCrossAttentionLayer(nn.Module):
|
||||
def __init__(self, config: IdeficsConfig, layer_idx: int = None):
|
||||
def __init__(self, config: IdeficsConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.hidden_size = config.hidden_size
|
||||
self.cross_attn = IdeficsAttention(
|
||||
|
@ -89,7 +89,9 @@ class Idefics2Processor(ProcessorMixin):
|
||||
image_processor_class = "Idefics2ImageProcessor"
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: str = None, **kwargs):
|
||||
def __init__(
|
||||
self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: Optional[str] = None, **kwargs
|
||||
):
|
||||
if image_processor is None:
|
||||
raise ValueError("You need to specify an `image_processor`.")
|
||||
if tokenizer is None:
|
||||
|
@ -133,7 +133,9 @@ class Idefics3Processor(ProcessorMixin):
|
||||
image_processor_class = "Idefics3ImageProcessor"
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: str = None, **kwargs):
|
||||
def __init__(
|
||||
self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: Optional[str] = None, **kwargs
|
||||
):
|
||||
if image_processor is None:
|
||||
raise ValueError("You need to specify an `image_processor`.")
|
||||
if tokenizer is None:
|
||||
|
@ -19,6 +19,7 @@ import json
|
||||
from collections import OrderedDict
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import timm
|
||||
import torch
|
||||
@ -79,7 +80,7 @@ def convert_weight_and_push(
|
||||
print(f"Pushed {checkpoint_name}")
|
||||
|
||||
|
||||
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
|
||||
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
|
||||
filename = "imagenet-1k-id2label.json"
|
||||
num_labels = 1000
|
||||
expected_shape = (1, num_labels)
|
||||
|
@ -333,7 +333,7 @@ class LlavaNextImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
@ -563,7 +563,7 @@ class LlavaNextImageProcessor(BaseImageProcessor):
|
||||
image_grid_pinpoints: List = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -183,7 +183,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
@ -283,7 +283,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -577,7 +577,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
|
||||
image: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
@ -601,7 +601,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
|
||||
image: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
|
@ -1592,7 +1592,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
|
||||
def forward_post(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
level_index: int = None,
|
||||
level_index: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_embeddings: Optional[torch.Tensor] = None,
|
||||
query_position_embeddings: Optional[torch.Tensor] = None,
|
||||
@ -1651,7 +1651,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
|
||||
def forward_pre(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
level_index: int = None,
|
||||
level_index: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_embeddings: Optional[torch.Tensor] = None,
|
||||
query_position_embeddings: Optional[torch.Tensor] = None,
|
||||
@ -1712,7 +1712,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
level_index: int = None,
|
||||
level_index: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_embeddings: Optional[torch.Tensor] = None,
|
||||
query_position_embeddings: Optional[torch.Tensor] = None,
|
||||
@ -2013,7 +2013,9 @@ class Mask2FormerMaskPredictor(nn.Module):
|
||||
|
||||
self.mask_embedder = Mask2FormerMLPPredictionHead(self.hidden_size, self.hidden_size, mask_feature_size)
|
||||
|
||||
def forward(self, outputs: torch.Tensor, pixel_embeddings: torch.Tensor, attention_mask_target_size: int = None):
|
||||
def forward(
|
||||
self, outputs: torch.Tensor, pixel_embeddings: torch.Tensor, attention_mask_target_size: Optional[int] = None
|
||||
):
|
||||
mask_embeddings = self.mask_embedder(outputs.transpose(0, 1))
|
||||
|
||||
is_tracing = torch.jit.is_tracing() or isinstance(outputs, torch.fx.Proxy) or is_torchdynamo_compiling()
|
||||
|
@ -578,7 +578,7 @@ class MaskFormerImageProcessor(BaseImageProcessor):
|
||||
image: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
@ -602,7 +602,7 @@ class MaskFormerImageProcessor(BaseImageProcessor):
|
||||
image: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
|
@ -1316,7 +1316,7 @@ class MimiVectorQuantization(nn.Module):
|
||||
class MimiResidualVectorQuantizer(nn.Module):
|
||||
"""Residual Vector Quantizer."""
|
||||
|
||||
def __init__(self, config: MimiConfig, num_quantizers: int = None):
|
||||
def __init__(self, config: MimiConfig, num_quantizers: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.codebook_size = config.codebook_size
|
||||
self.frame_rate = config.frame_rate
|
||||
|
@ -437,7 +437,7 @@ class MoonshineEncoderLayer(nn.Module):
|
||||
|
||||
|
||||
class MoonshineDecoderLayer(nn.Module):
|
||||
def __init__(self, config: MoonshineConfig, layer_idx: int = None):
|
||||
def __init__(self, config: MoonshineConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.hidden_size = config.hidden_size
|
||||
|
||||
|
@ -427,7 +427,7 @@ class MoonshineEncoderLayer(LlamaDecoderLayer):
|
||||
|
||||
|
||||
class MoonshineDecoderLayer(nn.Module):
|
||||
def __init__(self, config: MoonshineConfig, layer_idx: int = None):
|
||||
def __init__(self, config: MoonshineConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.hidden_size = config.hidden_size
|
||||
|
||||
|
@ -420,7 +420,7 @@ class MoshiGatingMLP(nn.Module):
|
||||
self.fc1 = MoshiFlexibleLinear(hidden_size, ffn_dim, num_layers)
|
||||
self.fc2 = MoshiFlexibleLinear(ffn_dim // 2, hidden_size, num_layers)
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor, layer_idx: int = None) -> torch.Tensor:
|
||||
def forward(self, hidden_states: torch.Tensor, layer_idx: Optional[int] = None) -> torch.Tensor:
|
||||
hidden_states = self.fc1(hidden_states) if layer_idx is None else self.fc1(hidden_states, layer_idx)
|
||||
|
||||
batch_size, sequence_length, _ = hidden_states.shape
|
||||
@ -2644,7 +2644,7 @@ class MoshiForConditionalGeneration(MoshiPreTrainedModel, GenerationMixin):
|
||||
return input_ids
|
||||
|
||||
def build_delay_pattern_mask(
|
||||
self, input_ids: torch.LongTensor, bos_token_id: int, pad_token_id: int, max_length: int = None
|
||||
self, input_ids: torch.LongTensor, bos_token_id: int, pad_token_id: int, max_length: Optional[int] = None
|
||||
):
|
||||
"""Build a delayed pattern mask to the input_ids. Each codebook, except the first one, is offset by
|
||||
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there
|
||||
|
@ -1377,7 +1377,9 @@ class MusicgenForCausalLM(MusicgenPreTrainedModel, GenerationMixin):
|
||||
"use_cache": use_cache,
|
||||
}
|
||||
|
||||
def build_delay_pattern_mask(self, input_ids: torch.LongTensor, pad_token_id: int, max_length: int = None):
|
||||
def build_delay_pattern_mask(
|
||||
self, input_ids: torch.LongTensor, pad_token_id: int, max_length: Optional[int] = None
|
||||
):
|
||||
"""Build a delayed pattern mask to the input_ids. Each codebook is offset by the previous codebook by
|
||||
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there
|
||||
are 4 codebooks and a max sequence length of 8, we have the delayed pattern mask of shape `(codebooks,
|
||||
@ -1828,9 +1830,9 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
@classmethod
|
||||
def from_sub_models_pretrained(
|
||||
cls,
|
||||
text_encoder_pretrained_model_name_or_path: str = None,
|
||||
audio_encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
text_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
audio_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
@ -2232,8 +2234,8 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
batch_size: int,
|
||||
model_input_name: str,
|
||||
model_kwargs: Dict[str, torch.Tensor],
|
||||
decoder_start_token_id: int = None,
|
||||
bos_token_id: int = None,
|
||||
decoder_start_token_id: Optional[int] = None,
|
||||
bos_token_id: Optional[int] = None,
|
||||
device: torch.device = None,
|
||||
) -> Tuple[torch.LongTensor, Dict[str, torch.Tensor]]:
|
||||
"""Prepares `decoder_input_ids` for generation with encoder-decoder models"""
|
||||
@ -2454,7 +2456,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
return torch.ones((batch_size, 1), dtype=torch.long, device=self.device) * bos_token_id
|
||||
|
||||
def _get_decoder_start_token_id(
|
||||
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: int = None
|
||||
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None
|
||||
) -> int:
|
||||
decoder_start_token_id = (
|
||||
decoder_start_token_id
|
||||
|
@ -1297,7 +1297,9 @@ class MusicgenMelodyForCausalLM(MusicgenMelodyPreTrainedModel, GenerationMixin):
|
||||
"use_cache": use_cache,
|
||||
}
|
||||
|
||||
def build_delay_pattern_mask(self, input_ids: torch.LongTensor, pad_token_id: int, max_length: int = None):
|
||||
def build_delay_pattern_mask(
|
||||
self, input_ids: torch.LongTensor, pad_token_id: int, max_length: Optional[int] = None
|
||||
):
|
||||
"""Build a delayed pattern mask to the input_ids. Each codebook is offset by the previous codebook by
|
||||
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there
|
||||
are 4 codebooks and a max sequence length of 8, we have the delayed pattern mask of shape `(codebooks,
|
||||
@ -1706,9 +1708,9 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
# Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration.from_sub_models_pretrained with Musicgen->MusicgenMelody, musicgen-small->musicgen-melody
|
||||
def from_sub_models_pretrained(
|
||||
cls,
|
||||
text_encoder_pretrained_model_name_or_path: str = None,
|
||||
audio_encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
text_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
audio_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
@ -2112,8 +2114,8 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
batch_size: int,
|
||||
model_input_name: str,
|
||||
model_kwargs: Dict[str, torch.Tensor],
|
||||
decoder_start_token_id: int = None,
|
||||
bos_token_id: int = None,
|
||||
decoder_start_token_id: Optional[int] = None,
|
||||
bos_token_id: Optional[int] = None,
|
||||
device: torch.device = None,
|
||||
) -> Tuple[torch.LongTensor, Dict[str, torch.Tensor]]:
|
||||
"""Prepares `decoder_input_ids` for generation with encoder-decoder models"""
|
||||
@ -2304,7 +2306,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
|
||||
|
||||
# Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration._get_decoder_start_token_id
|
||||
def _get_decoder_start_token_id(
|
||||
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: int = None
|
||||
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None
|
||||
) -> int:
|
||||
decoder_start_token_id = (
|
||||
decoder_start_token_id
|
||||
|
@ -19,7 +19,7 @@ Fast tokenizer class for Nougat.
|
||||
import re
|
||||
from functools import partial
|
||||
from multiprocessing import Pool
|
||||
from typing import List, Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -584,7 +584,7 @@ class NougatTokenizerFast(PreTrainedTokenizerFast):
|
||||
self,
|
||||
generation: Union[str, List[str]],
|
||||
fix_markdown: bool = True,
|
||||
num_workers: int = None,
|
||||
num_workers: Optional[int] = None,
|
||||
) -> Union[str, List[str]]:
|
||||
"""
|
||||
Postprocess a generated text or a list of generated texts.
|
||||
|
@ -440,7 +440,7 @@ class OneFormerImageProcessor(BaseImageProcessor):
|
||||
ignore_index: Optional[int] = None,
|
||||
do_reduce_labels: bool = False,
|
||||
repo_path: Optional[str] = "shi-labs/oneformer_demo",
|
||||
class_info_file: str = None,
|
||||
class_info_file: Optional[str] = None,
|
||||
num_text: Optional[int] = None,
|
||||
num_labels: Optional[int] = None,
|
||||
**kwargs,
|
||||
|
@ -105,7 +105,7 @@ class OPTAttention(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
config: OPTConfig,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__()
|
||||
@ -369,7 +369,7 @@ OPT_ATTENTION_CLASSES = {
|
||||
|
||||
|
||||
class OPTDecoderLayer(nn.Module):
|
||||
def __init__(self, config: OPTConfig, layer_idx: int = None):
|
||||
def __init__(self, config: OPTConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.embed_dim = config.hidden_size
|
||||
|
||||
|
@ -215,7 +215,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
images: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
crop_pct: int = None,
|
||||
crop_pct: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: Dict[str, int] = None,
|
||||
|
@ -152,7 +152,7 @@ class PromptDepthAnythingImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = False,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
prompt_scale_to_meter: float = 0.001, # default unit is mm
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -132,8 +132,8 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_convert_rgb: bool = True,
|
||||
min_pixels: int = None,
|
||||
max_pixels: int = None,
|
||||
min_pixels: Optional[int] = None,
|
||||
max_pixels: Optional[int] = None,
|
||||
patch_size: int = 14,
|
||||
temporal_patch_size: int = 2,
|
||||
merge_size: int = 2,
|
||||
@ -177,9 +177,9 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
|
||||
do_normalize: bool = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
patch_size: int = None,
|
||||
temporal_patch_size: int = None,
|
||||
merge_size: int = None,
|
||||
patch_size: Optional[int] = None,
|
||||
temporal_patch_size: Optional[int] = None,
|
||||
merge_size: Optional[int] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@ -304,17 +304,17 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
|
||||
videos: VideoInput = None,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
min_pixels: int = None,
|
||||
max_pixels: int = None,
|
||||
min_pixels: Optional[int] = None,
|
||||
max_pixels: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
patch_size: int = None,
|
||||
temporal_patch_size: int = None,
|
||||
merge_size: int = None,
|
||||
patch_size: Optional[int] = None,
|
||||
temporal_patch_size: Optional[int] = None,
|
||||
merge_size: Optional[int] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
|
@ -263,11 +263,11 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast):
|
||||
do_normalize: bool = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
min_pixels: int = None,
|
||||
max_pixels: int = None,
|
||||
patch_size: int = None,
|
||||
temporal_patch_size: int = None,
|
||||
merge_size: int = None,
|
||||
min_pixels: Optional[int] = None,
|
||||
max_pixels: Optional[int] = None,
|
||||
patch_size: Optional[int] = None,
|
||||
temporal_patch_size: Optional[int] = None,
|
||||
merge_size: Optional[int] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
|
@ -245,8 +245,8 @@ class RagPreTrainedModel(PreTrainedModel):
|
||||
@classmethod
|
||||
def from_pretrained_question_encoder_generator(
|
||||
cls,
|
||||
question_encoder_pretrained_model_name_or_path: str = None,
|
||||
generator_pretrained_model_name_or_path: str = None,
|
||||
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
generator_pretrained_model_name_or_path: Optional[str] = None,
|
||||
retriever: RagRetriever = None,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
|
@ -232,8 +232,8 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
|
||||
@classmethod
|
||||
def from_pretrained_question_encoder_generator(
|
||||
cls,
|
||||
question_encoder_pretrained_model_name_or_path: str = None,
|
||||
generator_pretrained_model_name_or_path: str = None,
|
||||
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
generator_pretrained_model_name_or_path: Optional[str] = None,
|
||||
retriever: RagRetriever = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
|
@ -81,7 +81,7 @@ class RagTokenizer:
|
||||
max_length: Optional[int] = None,
|
||||
max_target_length: Optional[int] = None,
|
||||
padding: str = "longest",
|
||||
return_tensors: str = None,
|
||||
return_tensors: Optional[str] = None,
|
||||
truncation: bool = True,
|
||||
**kwargs,
|
||||
) -> BatchEncoding:
|
||||
|
@ -25,7 +25,7 @@ from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@ -159,7 +159,7 @@ def get_from_to_our_keys(model_name: str) -> Dict[str, str]:
|
||||
return from_to_ours_keys
|
||||
|
||||
|
||||
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
|
||||
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
|
||||
filename = "imagenet-1k-id2label.json"
|
||||
num_labels = 1000
|
||||
|
||||
|
@ -19,7 +19,7 @@ import json
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Tuple
|
||||
from typing import Callable, Dict, List, Optional, Tuple
|
||||
|
||||
import timm
|
||||
import torch
|
||||
@ -218,7 +218,7 @@ def convert_weight_and_push(
|
||||
print(f"Pushed {name}")
|
||||
|
||||
|
||||
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
|
||||
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
|
||||
filename = "imagenet-1k-id2label.json"
|
||||
num_labels = 1000
|
||||
expected_shape = (1, num_labels)
|
||||
|
@ -19,7 +19,7 @@ import json
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
import timm
|
||||
import torch
|
||||
@ -122,7 +122,7 @@ def convert_weight_and_push(name: str, config: ResNetConfig, save_directory: Pat
|
||||
print(f"Pushed {checkpoint_name}")
|
||||
|
||||
|
||||
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
|
||||
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
|
||||
filename = "imagenet-1k-id2label.json"
|
||||
num_labels = 1000
|
||||
expected_shape = (1, num_labels)
|
||||
|
@ -770,8 +770,8 @@ class RoCBertTokenizer(PreTrainedTokenizer):
|
||||
self,
|
||||
token_ids_0: List[int],
|
||||
token_ids_1: Optional[List[int]] = None,
|
||||
cls_token_id: int = None,
|
||||
sep_token_id: int = None,
|
||||
cls_token_id: Optional[int] = None,
|
||||
sep_token_id: Optional[int] = None,
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
|
@ -127,8 +127,8 @@ class SamImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: bool = True,
|
||||
pad_size: int = None,
|
||||
mask_pad_size: int = None,
|
||||
pad_size: Optional[int] = None,
|
||||
mask_pad_size: Optional[int] = None,
|
||||
do_convert_rgb: bool = True,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
|
@ -325,8 +325,8 @@ class TFSegformerMixFFN(keras.layers.Layer):
|
||||
self,
|
||||
config: SegformerConfig,
|
||||
in_features: int,
|
||||
hidden_features: int = None,
|
||||
out_features: int = None,
|
||||
hidden_features: Optional[int] = None,
|
||||
out_features: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
|
@ -52,7 +52,7 @@ class SiglipProcessor(ProcessorMixin):
|
||||
images: ImageInput = None,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
max_length: int = None,
|
||||
max_length: Optional[int] = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
|
@ -141,7 +141,9 @@ class SmolVLMProcessor(ProcessorMixin):
|
||||
image_processor_class = "SmolVLMImageProcessor"
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: str = None, **kwargs):
|
||||
def __init__(
|
||||
self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: Optional[str] = None, **kwargs
|
||||
):
|
||||
self.fake_image_token = getattr(tokenizer, "fake_image_token", "<fake_token_around_image>")
|
||||
self.image_token = getattr(tokenizer, "image_token", "<image>")
|
||||
self.end_of_utterance_token = getattr(tokenizer, "end_of_utterance_token", "<end_of_utterance>")
|
||||
|
@ -291,8 +291,8 @@ class SpeechEncoderDecoderModel(PreTrainedModel, GenerationMixin):
|
||||
@classmethod
|
||||
def from_encoder_decoder_pretrained(
|
||||
cls,
|
||||
encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
|
@ -247,8 +247,8 @@ class TapasTokenizer(PreTrainedTokenizer):
|
||||
tokenize_chinese_chars=True,
|
||||
strip_accents=None,
|
||||
cell_trim_length: int = -1,
|
||||
max_column_id: int = None,
|
||||
max_row_id: int = None,
|
||||
max_column_id: Optional[int] = None,
|
||||
max_row_id: Optional[int] = None,
|
||||
strip_column_names: bool = False,
|
||||
update_answer_coordinates: bool = False,
|
||||
min_question_length=None,
|
||||
@ -2242,8 +2242,8 @@ class NumericValue:
|
||||
|
||||
@dataclass
|
||||
class NumericValueSpan:
|
||||
begin_index: int = None
|
||||
end_index: int = None
|
||||
begin_index: Optional[int] = None
|
||||
end_index: Optional[int] = None
|
||||
values: List[NumericValue] = None
|
||||
|
||||
|
||||
|
@ -205,10 +205,10 @@ class TextNetImageProcessor(BaseImageProcessor):
|
||||
images: ImageInput,
|
||||
do_resize: bool = None,
|
||||
size: Dict[str, int] = None,
|
||||
size_divisor: int = None,
|
||||
size_divisor: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
|
@ -144,8 +144,8 @@ class TrOCRAttention(nn.Module):
|
||||
config,
|
||||
embed_dim: int,
|
||||
num_heads: int,
|
||||
kdim: int = None,
|
||||
vdim: int = None,
|
||||
kdim: Optional[int] = None,
|
||||
vdim: Optional[int] = None,
|
||||
dropout: float = 0.0,
|
||||
is_decoder: bool = False,
|
||||
bias: bool = True,
|
||||
|
@ -178,7 +178,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_rescale: bool = None,
|
||||
rescale_factor: float = None,
|
||||
do_normalize: bool = None,
|
||||
@ -332,7 +332,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_center_crop: bool = None,
|
||||
crop_size: int = None,
|
||||
crop_size: Optional[int] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
|
@ -309,8 +309,8 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
|
||||
@classmethod
|
||||
def from_encoder_decoder_pretrained(
|
||||
cls,
|
||||
encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> TFPreTrainedModel:
|
||||
|
@ -380,8 +380,8 @@ class VisionEncoderDecoderModel(PreTrainedModel, GenerationMixin):
|
||||
@classmethod
|
||||
def from_encoder_decoder_pretrained(
|
||||
cls,
|
||||
encoder_pretrained_model_name_or_path: str = None,
|
||||
decoder_pretrained_model_name_or_path: str = None,
|
||||
encoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
decoder_pretrained_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
|
@ -414,8 +414,8 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
|
||||
@classmethod
|
||||
def from_vision_text_pretrained(
|
||||
cls,
|
||||
vision_model_name_or_path: str = None,
|
||||
text_model_name_or_path: str = None,
|
||||
vision_model_name_or_path: Optional[str] = None,
|
||||
text_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> FlaxPreTrainedModel:
|
||||
|
@ -465,8 +465,8 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
|
||||
@classmethod
|
||||
def from_vision_text_pretrained(
|
||||
cls,
|
||||
vision_model_name_or_path: str = None,
|
||||
text_model_name_or_path: str = None,
|
||||
vision_model_name_or_path: Optional[str] = None,
|
||||
text_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> TFPreTrainedModel:
|
||||
|
@ -417,8 +417,8 @@ class VisionTextDualEncoderModel(PreTrainedModel):
|
||||
@classmethod
|
||||
def from_vision_text_pretrained(
|
||||
cls,
|
||||
vision_model_name_or_path: str = None,
|
||||
text_model_name_or_path: str = None,
|
||||
vision_model_name_or_path: Optional[str] = None,
|
||||
text_model_name_or_path: Optional[str] = None,
|
||||
*model_args,
|
||||
**kwargs,
|
||||
) -> PreTrainedModel:
|
||||
|
@ -14,6 +14,8 @@
|
||||
# limitations under the License.
|
||||
"""VitPose model configuration"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ...utils.backbone_utils import verify_backbone_config_arguments
|
||||
@ -75,11 +77,11 @@ class VitPoseConfig(PretrainedConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
backbone_config: PretrainedConfig = None,
|
||||
backbone: str = None,
|
||||
backbone_config: Optional[PretrainedConfig] = None,
|
||||
backbone: Optional[str] = None,
|
||||
use_pretrained_backbone: bool = False,
|
||||
use_timm_backbone: bool = False,
|
||||
backbone_kwargs: dict = None,
|
||||
backbone_kwargs: Optional[dict] = None,
|
||||
initializer_range: float = 0.02,
|
||||
scale_factor: int = 4,
|
||||
use_simple_decoder: bool = True,
|
||||
|
@ -652,7 +652,7 @@ class WhisperEncoderLayer(nn.Module):
|
||||
|
||||
|
||||
class WhisperDecoderLayer(nn.Module):
|
||||
def __init__(self, config: WhisperConfig, layer_idx: int = None):
|
||||
def __init__(self, config: WhisperConfig, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.embed_dim = config.d_model
|
||||
|
||||
|
@ -377,7 +377,9 @@ class WhisperTokenizer(PreTrainedTokenizer):
|
||||
self.cache[token] = word
|
||||
return word
|
||||
|
||||
def set_prefix_tokens(self, language: str = None, task: str = None, predict_timestamps: bool = None):
|
||||
def set_prefix_tokens(
|
||||
self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None
|
||||
):
|
||||
"""
|
||||
Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to
|
||||
update the prefix tokens as required when fine-tuning. Example:
|
||||
@ -1276,7 +1278,7 @@ def _collate_word_timestamps(tokenizer, tokens, token_timestamps, language, retu
|
||||
def _combine_tokens_into_words(
|
||||
tokenizer,
|
||||
tokens: List[int],
|
||||
language: str = None,
|
||||
language: Optional[str] = None,
|
||||
prepend_punctuations: str = "\"'“¡¿([{-",
|
||||
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
||||
):
|
||||
|
@ -451,7 +451,9 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return tuple(files) + (normalizer_file,)
|
||||
|
||||
def set_prefix_tokens(self, language: str = None, task: str = None, predict_timestamps: bool = None):
|
||||
def set_prefix_tokens(
|
||||
self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None
|
||||
):
|
||||
"""
|
||||
Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to
|
||||
update the prefix tokens as required when fine-tuning. Example:
|
||||
|
@ -679,7 +679,7 @@ class ZambaMambaDecoderLayer(nn.Module):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
original_hidden_states: Optional[torch.Tensor] = None,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[ZambaHybridDynamicCache] = None,
|
||||
@ -747,7 +747,7 @@ class ZambaHybridLayer(nn.Module):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
original_hidden_states: Optional[torch.Tensor] = None,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[ZambaHybridDynamicCache] = None,
|
||||
|
@ -385,8 +385,8 @@ class Zamba2Attention(nn.Module):
|
||||
self,
|
||||
config: Zamba2Config,
|
||||
layer_idx: Optional[int] = None,
|
||||
num_fwd_mem_blocks: int = None,
|
||||
block_id: int = None,
|
||||
num_fwd_mem_blocks: Optional[int] = None,
|
||||
block_id: Optional[int] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
@ -560,7 +560,7 @@ class Zamba2MambaMixer(nn.Module):
|
||||
and is why Mamba is called **selective** state spaces)
|
||||
"""
|
||||
|
||||
def __init__(self, config: Zamba2Config, layer_idx: int = None):
|
||||
def __init__(self, config: Zamba2Config, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.hidden_size = config.hidden_size
|
||||
@ -983,7 +983,7 @@ class Zamba2MambaMixer(nn.Module):
|
||||
|
||||
|
||||
class Zamba2MLP(nn.Module):
|
||||
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: int = None):
|
||||
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: Optional[int] = None):
|
||||
"""
|
||||
This MLP layer contributes to tied transformer blocks aimed to increasing compute without increasing model size. Because this layer
|
||||
is tied, un-tied adapter modules (formally same as LoRA, but used in the base model) are added to the up and gate projectors to increase expressivity with a small memory overhead.
|
||||
@ -1025,7 +1025,7 @@ class Zamba2MLP(nn.Module):
|
||||
|
||||
|
||||
class Zamba2AttentionDecoderLayer(nn.Module):
|
||||
def __init__(self, config: Zamba2Config, block_id: int = None, layer_idx: Optional[int] = None):
|
||||
def __init__(self, config: Zamba2Config, block_id: Optional[int] = None, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.block_id = block_id
|
||||
num_gs = len(config.hybrid_layer_ids)
|
||||
@ -1099,7 +1099,7 @@ class Zamba2MambaDecoderLayer(nn.Module):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
original_hidden_states: Optional[torch.Tensor] = None,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[Zamba2HybridDynamicCache] = None,
|
||||
@ -1169,7 +1169,7 @@ class Zamba2HybridLayer(nn.Module):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
original_hidden_states: Optional[torch.Tensor] = None,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[Zamba2HybridDynamicCache] = None,
|
||||
|
@ -199,8 +199,8 @@ class Zamba2Attention(ZambaAttention):
|
||||
self,
|
||||
config: Zamba2Config,
|
||||
layer_idx: Optional[int] = None,
|
||||
num_fwd_mem_blocks: int = None,
|
||||
block_id: int = None,
|
||||
num_fwd_mem_blocks: Optional[int] = None,
|
||||
block_id: Optional[int] = None,
|
||||
):
|
||||
super().__init__(config, layer_idx)
|
||||
self.num_fwd_mem_blocks = num_fwd_mem_blocks
|
||||
@ -302,7 +302,7 @@ class Zamba2MambaMixer(nn.Module):
|
||||
and is why Mamba is called **selective** state spaces)
|
||||
"""
|
||||
|
||||
def __init__(self, config: Zamba2Config, layer_idx: int = None):
|
||||
def __init__(self, config: Zamba2Config, layer_idx: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.hidden_size = config.hidden_size
|
||||
@ -725,7 +725,7 @@ class Zamba2MambaMixer(nn.Module):
|
||||
|
||||
|
||||
class Zamba2MLP(nn.Module):
|
||||
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: int = None):
|
||||
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: Optional[int] = None):
|
||||
"""
|
||||
This MLP layer contributes to tied transformer blocks aimed to increasing compute without increasing model size. Because this layer
|
||||
is tied, un-tied adapter modules (formally same as LoRA, but used in the base model) are added to the up and gate projectors to increase expressivity with a small memory overhead.
|
||||
@ -767,7 +767,7 @@ class Zamba2MLP(nn.Module):
|
||||
|
||||
|
||||
class Zamba2AttentionDecoderLayer(ZambaAttentionDecoderLayer):
|
||||
def __init__(self, config: Zamba2Config, block_id: int = None, layer_idx: Optional[int] = None):
|
||||
def __init__(self, config: Zamba2Config, block_id: Optional[int] = None, layer_idx: Optional[int] = None):
|
||||
self.block_id = block_id
|
||||
num_gs = len(config.hybrid_layer_ids)
|
||||
super().__init__(config, layer_idx)
|
||||
@ -847,7 +847,7 @@ class Zamba2HybridLayer(ZambaHybridLayer):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
original_hidden_states: Optional[torch.Tensor] = None,
|
||||
layer_idx: int = None,
|
||||
layer_idx: Optional[int] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[Zamba2HybridDynamicCache] = None,
|
||||
|
@ -305,9 +305,9 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_resize: bool = None,
|
||||
size: int = None,
|
||||
size: Optional[int] = None,
|
||||
keep_aspect_ratio: bool = None,
|
||||
ensure_multiple_of: int = None,
|
||||
ensure_multiple_of: Optional[int] = None,
|
||||
resample: PILImageResampling = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
|
@ -291,7 +291,7 @@ class OnnxConfig(ABC):
|
||||
sampling_rate: int = 22050,
|
||||
time_duration: float = 5.0,
|
||||
frequency: int = 220,
|
||||
tokenizer: "PreTrainedTokenizerBase" = None,
|
||||
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Generate inputs to provide to the ONNX exporter for the specific framework
|
||||
@ -445,7 +445,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
|
||||
self,
|
||||
config: "PretrainedConfig",
|
||||
task: str = "default",
|
||||
patching_specs: List[PatchingSpec] = None,
|
||||
patching_specs: Optional[list[PatchingSpec]] = None,
|
||||
use_past: bool = False,
|
||||
):
|
||||
super().__init__(config, task=task, patching_specs=patching_specs)
|
||||
@ -639,7 +639,7 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast):
|
||||
|
||||
def generate_dummy_inputs(
|
||||
self,
|
||||
tokenizer: "PreTrainedTokenizerBase",
|
||||
tokenizer: Optional["PreTrainedTokenizerBase"],
|
||||
batch_size: int = -1,
|
||||
seq_length: int = -1,
|
||||
is_pair: bool = False,
|
||||
|
@ -16,7 +16,7 @@ import warnings
|
||||
from inspect import signature
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterable, List, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from packaging.version import Version, parse
|
||||
@ -85,7 +85,7 @@ def export_pytorch(
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
tokenizer: Optional["PreTrainedTokenizer"] = None,
|
||||
device: str = "cpu",
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
@ -188,7 +188,7 @@ def export_tensorflow(
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
tokenizer: Optional["PreTrainedTokenizer"] = None,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Export a TensorFlow model to an ONNX Intermediate Representation (IR)
|
||||
@ -254,7 +254,7 @@ def export(
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
tokenizer: Optional["PreTrainedTokenizer"] = None,
|
||||
device: str = "cpu",
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
@ -321,7 +321,7 @@ def validate_model_outputs(
|
||||
onnx_model: Path,
|
||||
onnx_named_outputs: List[str],
|
||||
atol: float,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
tokenizer: Optional["PreTrainedTokenizer"] = None,
|
||||
):
|
||||
from onnxruntime import InferenceSession, SessionOptions
|
||||
|
||||
|
@ -531,7 +531,7 @@ class BatchEncoding(UserDict):
|
||||
span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
|
||||
return TokenSpan(*span) if span is not None else None
|
||||
|
||||
def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> CharSpan:
|
||||
def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> Optional[CharSpan]:
|
||||
"""
|
||||
Get the character span corresponding to an encoded token in a sequence of the batch.
|
||||
|
||||
@ -2629,7 +2629,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
||||
add_special_tokens: bool = True,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
||||
max_length: Optional[int] = None,
|
||||
stride: int = 0,
|
||||
padding_side: Optional[str] = None,
|
||||
@ -2810,15 +2810,15 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
||||
def __call__(
|
||||
self,
|
||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput], None] = None,
|
||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
|
||||
text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
||||
text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput], None] = None,
|
||||
text_pair_target: Optional[
|
||||
Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]
|
||||
] = None,
|
||||
add_special_tokens: bool = True,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
||||
max_length: Optional[int] = None,
|
||||
stride: int = 0,
|
||||
is_split_into_words: bool = False,
|
||||
@ -2905,7 +2905,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
|
||||
add_special_tokens: bool = True,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
||||
max_length: Optional[int] = None,
|
||||
stride: int = 0,
|
||||
is_split_into_words: bool = False,
|
||||
@ -3131,7 +3131,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
],
|
||||
add_special_tokens: bool = True,
|
||||
padding: Union[bool, str, PaddingStrategy] = False,
|
||||
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
||||
max_length: Optional[int] = None,
|
||||
stride: int = 0,
|
||||
is_split_into_words: bool = False,
|
||||
@ -3807,7 +3807,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
self,
|
||||
sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"],
|
||||
skip_special_tokens: bool = False,
|
||||
clean_up_tokenization_spaces: bool = None,
|
||||
clean_up_tokenization_spaces: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> List[str]:
|
||||
"""
|
||||
@ -3841,7 +3841,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
self,
|
||||
token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"],
|
||||
skip_special_tokens: bool = False,
|
||||
clean_up_tokenization_spaces: bool = None,
|
||||
clean_up_tokenization_spaces: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
@ -3878,7 +3878,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
self,
|
||||
token_ids: Union[int, List[int]],
|
||||
skip_special_tokens: bool = False,
|
||||
clean_up_tokenization_spaces: bool = None,
|
||||
clean_up_tokenization_spaces: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
raise NotImplementedError
|
||||
|
@ -414,7 +414,7 @@ class Trainer:
|
||||
@deprecate_kwarg("tokenizer", new_name="processing_class", version="5.0.0", raise_if_both_names=True)
|
||||
def __init__(
|
||||
self,
|
||||
model: Union[PreTrainedModel, nn.Module] = None,
|
||||
model: Union[PreTrainedModel, nn.Module, None] = None,
|
||||
args: TrainingArguments = None,
|
||||
data_collator: Optional[DataCollator] = None,
|
||||
train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
|
||||
@ -2139,7 +2139,7 @@ class Trainer:
|
||||
def train(
|
||||
self,
|
||||
resume_from_checkpoint: Optional[Union[str, bool]] = None,
|
||||
trial: Union["optuna.Trial", dict[str, Any]] = None,
|
||||
trial: Union["optuna.Trial", dict[str, Any], None] = None,
|
||||
ignore_keys_for_eval: Optional[list[str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
@ -4920,10 +4920,10 @@ class Trainer:
|
||||
logger.info(f" Num examples = {num_examples}")
|
||||
logger.info(f" Batch size = {batch_size}")
|
||||
|
||||
losses_host: torch.Tensor = None
|
||||
preds_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||
labels_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||
inputs_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||
losses_host: Optional[torch.Tensor] = None
|
||||
preds_host: Union[torch.Tensor, list[torch.Tensor], None] = None
|
||||
labels_host: Union[torch.Tensor, list[torch.Tensor], None] = None
|
||||
inputs_host: Union[torch.Tensor, list[torch.Tensor], None] = None
|
||||
metrics: Optional[dict] = None
|
||||
eval_set_kwargs: dict = {}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user