Fix typing for None valued variables (#37004)

Fix typing for None-able variables
This commit is contained in:
cyyever 2025-03-27 22:46:32 +08:00 committed by GitHub
parent 8c5e29bad5
commit de77f5b1ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
80 changed files with 271 additions and 249 deletions

View File

@ -19,6 +19,7 @@ import json
import os
import re
from contextlib import contextmanager
from typing import Optional
from transformers.utils.import_utils import export
@ -284,7 +285,7 @@ def model_addition_debugger(cls):
@export(backends=("torch",))
@contextmanager
def model_addition_debugger_context(model, debug_path: str = None):
def model_addition_debugger_context(model, debug_path: Optional[str] = None):
"""
# Model addition debugger - context manager for model adders
This context manager is a power user tool intended for model adders.

View File

@ -42,7 +42,7 @@ class BaseModelOutput(ModelOutput):
heads.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -62,7 +62,7 @@ class BaseModelOutputWithNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -92,8 +92,8 @@ class BaseModelOutputWithPooling(ModelOutput):
heads.
"""
last_hidden_state: torch.FloatTensor = None
pooler_output: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -115,8 +115,8 @@ class BaseModelOutputWithPoolingAndNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
"""
last_hidden_state: torch.FloatTensor = None
pooler_output: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -153,7 +153,7 @@ class BaseModelOutputWithPast(ModelOutput):
heads.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -186,7 +186,7 @@ class BaseModelOutputWithCrossAttentions(ModelOutput):
weighted average in the cross-attention heads.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -233,8 +233,8 @@ class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
input) to speed up sequential decoding.
"""
last_hidden_state: torch.FloatTensor = None
pooler_output: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -280,7 +280,7 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
weighted average in the cross-attention heads.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -327,12 +327,12 @@ class MoECausalLMOutputWithPast(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
z_loss: torch.FloatTensor = None
aux_loss: torch.FloatTensor = None
z_loss: Optional[torch.FloatTensor] = None
aux_loss: Optional[torch.FloatTensor] = None
router_logits: Optional[Tuple[torch.FloatTensor]] = None
@ -362,7 +362,7 @@ class MoEModelOutput(ModelOutput):
loss and the z_loss for Mixture of Experts models.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
router_probs: Optional[Tuple[torch.FloatTensor]] = None
@ -403,7 +403,7 @@ class MoeModelOutputWithPast(ModelOutput):
loss for Mixture of Experts models.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -452,7 +452,7 @@ class MoeCausalLMOutputWithPast(ModelOutput):
loss: Optional[torch.FloatTensor] = None
aux_loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -504,7 +504,7 @@ class MoEModelOutputWithPastAndCrossAttentions(ModelOutput):
loss and the z_loss for Mixture of Experts models.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -563,7 +563,7 @@ class Seq2SeqModelOutput(ModelOutput):
self-attention heads.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -633,7 +633,7 @@ class Seq2SeqMoEModelOutput(ModelOutput):
modules.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -669,7 +669,7 @@ class CausalLMOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -704,7 +704,7 @@ class CausalLMOutputWithPast(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -747,7 +747,7 @@ class CausalLMOutputWithCrossAttentions(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -784,7 +784,7 @@ class SequenceClassifierOutputWithPast(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -814,7 +814,7 @@ class MaskedLMOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -869,7 +869,7 @@ class Seq2SeqLMOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -938,11 +938,11 @@ class Seq2SeqMoEOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
encoder_z_loss: torch.FloatTensor = None
decoder_z_loss: torch.FloatTensor = None
encoder_aux_loss: torch.FloatTensor = None
decoder_aux_loss: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
encoder_z_loss: Optional[torch.FloatTensor] = None
decoder_z_loss: Optional[torch.FloatTensor] = None
encoder_aux_loss: Optional[torch.FloatTensor] = None
decoder_aux_loss: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -979,7 +979,7 @@ class NextSentencePredictorOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1008,7 +1008,7 @@ class SequenceClassifierOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1063,7 +1063,7 @@ class Seq2SeqSequenceClassifierOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1099,7 +1099,7 @@ class MultipleChoiceModelOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1128,7 +1128,7 @@ class TokenClassifierOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1159,8 +1159,8 @@ class QuestionAnsweringModelOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
start_logits: torch.FloatTensor = None
end_logits: torch.FloatTensor = None
start_logits: Optional[torch.FloatTensor] = None
end_logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1217,8 +1217,8 @@ class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
start_logits: torch.FloatTensor = None
end_logits: torch.FloatTensor = None
start_logits: Optional[torch.FloatTensor] = None
end_logits: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1261,7 +1261,7 @@ class SemanticSegmenterOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1289,7 +1289,7 @@ class ImageClassifierOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1311,7 +1311,7 @@ class ImageClassifierOutputWithNoAttention(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1340,7 +1340,7 @@ class DepthEstimatorOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
predicted_depth: torch.FloatTensor = None
predicted_depth: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1368,7 +1368,7 @@ class ImageSuperResolutionOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
reconstruction: torch.FloatTensor = None
reconstruction: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1396,8 +1396,8 @@ class Wav2Vec2BaseModelOutput(ModelOutput):
heads.
"""
last_hidden_state: torch.FloatTensor = None
extract_features: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
extract_features: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1428,8 +1428,8 @@ class XVectorOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
embeddings: torch.FloatTensor = None
logits: Optional[torch.FloatTensor] = None
embeddings: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1456,7 +1456,7 @@ class BackboneOutput(ModelOutput):
heads.
"""
feature_maps: Tuple[torch.FloatTensor] = None
feature_maps: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1491,8 +1491,8 @@ class BaseModelOutputWithPoolingAndProjection(ModelOutput):
Text embeddings before the projection layer, used to mimic the last hidden state of the teacher encoder.
"""
last_hidden_state: torch.FloatTensor = None
pooler_output: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
projection_state: Optional[Tuple[torch.FloatTensor]] = None
@ -1548,7 +1548,7 @@ class Seq2SeqSpectrogramOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
spectrogram: torch.FloatTensor = None
spectrogram: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1617,7 +1617,7 @@ class Seq2SeqTSModelOutput(ModelOutput):
Static features of each time series' in a batch which are copied to the covariates at inference time.
"""
last_hidden_state: torch.FloatTensor = None
last_hidden_state: Optional[torch.FloatTensor] = None
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@ -1713,7 +1713,7 @@ class SampleTSPredictionOutput(ModelOutput):
Sampled values from the chosen distribution.
"""
sequences: torch.FloatTensor = None
sequences: Optional[torch.FloatTensor] = None
@dataclass
@ -1739,7 +1739,7 @@ class MaskedImageModelingOutput(ModelOutput):
"""
loss: Optional[torch.FloatTensor] = None
reconstruction: torch.FloatTensor = None
reconstruction: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None

View File

@ -19,7 +19,7 @@ import json
import os
import re
from os import path
from typing import Dict, Union
from typing import Dict, Optional, Union
import torch
from huggingface_hub import split_torch_state_dict_into_shards
@ -172,7 +172,7 @@ def convert_mamba_ssm_checkpoint_file_to_huggingface_model_file(
mamba_ssm_checkpoint_path: str,
precision: str,
output_dir: str,
tokenizer_path: str = None,
tokenizer_path: Optional[str] = None,
save_model: Union[bool, str] = True,
) -> None:
# load tokenizer if provided, this will be used to set the

View File

@ -175,7 +175,7 @@ class BarkProcessor(ProcessorMixin):
super().save_pretrained(save_directory, push_to_hub, **kwargs)
def _load_voice_preset(self, voice_preset: str = None, **kwargs):
def _load_voice_preset(self, voice_preset: Optional[str] = None, **kwargs):
voice_preset_paths = self.speaker_embeddings[voice_preset]
voice_preset_dict = {}

View File

@ -412,7 +412,7 @@ class FlaxBigBirdSelfAttention(nn.Module):
class FlaxBigBirdBlockSparseAttention(nn.Module):
config: BigBirdConfig
block_sparse_seed: int = None
block_sparse_seed: Optional[int] = None
dtype: jnp.dtype = jnp.float32
def setup(self):
@ -1262,7 +1262,7 @@ class FlaxBigBirdSelfOutput(nn.Module):
class FlaxBigBirdAttention(nn.Module):
config: BigBirdConfig
layer_id: int = None
layer_id: Optional[int] = None
causal: bool = False
dtype: jnp.dtype = jnp.float32
@ -1362,7 +1362,7 @@ class FlaxBigBirdOutput(nn.Module):
class FlaxBigBirdLayer(nn.Module):
config: BigBirdConfig
layer_id: int = None
layer_id: Optional[int] = None
dtype: jnp.dtype = jnp.float32 # the dtype of the computation
def setup(self):

View File

@ -180,7 +180,7 @@ class BitImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -176,7 +176,7 @@ class ChameleonImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -169,7 +169,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -92,7 +92,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
return_attention_mask=False, # pad inputs to max length with silence token (zero) and no attention mask
frequency_min: float = 0,
frequency_max: float = 14_000,
top_db: int = None,
top_db: Optional[int] = None,
truncation: str = "fusion",
padding: str = "repeatpad",
**kwargs,
@ -258,7 +258,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
def __call__(
self,
raw_speech: Union[np.ndarray, List[float], List[np.ndarray], List[List[float]]],
truncation: str = None,
truncation: Optional[str] = None,
padding: Optional[str] = None,
max_length: Optional[int] = None,
sampling_rate: Optional[int] = None,

View File

@ -204,7 +204,7 @@ class CLIPImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -1360,7 +1360,7 @@ class CLIPSegForImageSegmentation(CLIPSegPreTrainedModel):
def get_conditional_embeddings(
self,
batch_size: int = None,
batch_size: Optional[int] = None,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,

View File

@ -287,7 +287,7 @@ class DacResidualVectorQuantize(nn.Module):
self.quantizers = nn.ModuleList([DacVectorQuantize(config) for i in range(config.n_codebooks)])
self.quantizer_dropout = quantizer_dropout
def forward(self, hidden_state, n_quantizers: int = None):
def forward(self, hidden_state, n_quantizers: Optional[int] = None):
"""
Quantizes the input tensor using a fixed set of codebooks and returns corresponding codebook vectors.
Args:
@ -608,7 +608,7 @@ class DacModel(DacPreTrainedModel):
def encode(
self,
input_values: torch.Tensor,
n_quantizers: int = None,
n_quantizers: Optional[int] = None,
return_dict: Optional[bool] = None,
):
"""
@ -681,7 +681,7 @@ class DacModel(DacPreTrainedModel):
def forward(
self,
input_values: torch.Tensor,
n_quantizers: int = None,
n_quantizers: Optional[int] = None,
return_dict: Optional[bool] = None,
):
"""

View File

@ -462,7 +462,7 @@ class SPMTokenizer:
return ["".join(x) for x in output]
def save_pretrained(self, path: str, filename_prefix: str = None):
def save_pretrained(self, path: str, filename_prefix: Optional[str] = None):
filename = VOCAB_FILES_NAMES[list(VOCAB_FILES_NAMES.keys())[0]]
if filename_prefix is not None:
filename = filename_prefix + "-" + filename

View File

@ -182,7 +182,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None,

View File

@ -14,6 +14,8 @@
# limitations under the License.
"""Graphormer model configuration"""
from typing import Optional
from ....configuration_utils import PretrainedConfig
from ....utils import logging
@ -159,8 +161,8 @@ class GraphormerConfig(PretrainedConfig):
traceable: bool = False,
q_noise: float = 0.0,
qn_block_size: int = 8,
kdim: int = None,
vdim: int = None,
kdim: Optional[int] = None,
vdim: Optional[int] = None,
bias: bool = True,
self_attention: bool = True,
pad_token_id=0,

View File

@ -162,7 +162,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
lower_case=False,
delimiter=None,
vocab_file=None,
pretrained_vocab_file: str = None,
pretrained_vocab_file: Optional[str] = None,
never_split=None,
unk_token="<unk>",
eos_token="<eos>",

View File

@ -280,7 +280,7 @@ class TvltImageProcessor(BaseImageProcessor):
do_resize: bool = None,
size: Dict[str, int] = None,
patch_size: List[int] = None,
num_frames: int = None,
num_frames: Optional[int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: Dict[str, int] = None,

View File

@ -22,7 +22,7 @@ import sys
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import List
from typing import List, Optional
import torch
import torch.nn as nn
@ -163,7 +163,7 @@ def convert_weight_and_push(
print(f"Pushed {checkpoint_name}")
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
filename = "imagenet-1k-id2label.json"
num_labels = 1000

View File

@ -196,7 +196,7 @@ class ViTHybridImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -161,7 +161,7 @@ class DPTImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = False,
size_divisor: int = None,
size_divisor: Optional[int] = None,
do_reduce_labels: bool = False,
**kwargs,
) -> None:
@ -299,14 +299,14 @@ class DPTImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
keep_aspect_ratio: bool = None,
ensure_multiple_of: int = None,
ensure_multiple_of: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
):
if do_reduce_labels:
@ -340,14 +340,14 @@ class DPTImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
keep_aspect_ratio: bool = None,
ensure_multiple_of: int = None,
ensure_multiple_of: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
data_format: Optional[Union[str, ChannelDimension]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
) -> np.ndarray:
@ -391,7 +391,7 @@ class DPTImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
keep_aspect_ratio: bool = None,
ensure_multiple_of: int = None,
ensure_multiple_of: Optional[int] = None,
do_reduce_labels: bool = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
):
@ -437,9 +437,9 @@ class DPTImageProcessor(BaseImageProcessor):
images: ImageInput,
segmentation_maps: Optional[ImageInput] = None,
do_resize: bool = None,
size: int = None,
size: Optional[int] = None,
keep_aspect_ratio: bool = None,
ensure_multiple_of: int = None,
ensure_multiple_of: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,
@ -447,7 +447,7 @@ class DPTImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
do_reduce_labels: Optional[bool] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
data_format: ChannelDimension = ChannelDimension.FIRST,

View File

@ -398,8 +398,8 @@ class EncoderDecoderModel(PreTrainedModel, GenerationMixin):
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:

View File

@ -311,8 +311,8 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:

View File

@ -172,7 +172,7 @@ class EsmConfig(PretrainedConfig):
@dataclass
class EsmFoldConfig:
esm_type: str = None
esm_type: Optional[str] = None
fp16_esm: bool = True
use_esm_attn_map: bool = False
esm_ablate_pairwise: bool = False

View File

@ -249,7 +249,7 @@ class FlavaImageProcessor(BaseImageProcessor):
codebook_size: bool = None,
codebook_resample: int = PILImageResampling.LANCZOS,
codebook_do_center_crop: bool = True,
codebook_crop_size: int = None,
codebook_crop_size: Optional[int] = None,
codebook_do_rescale: bool = True,
codebook_rescale_factor: Union[int, float] = 1 / 255,
codebook_do_map_pixels: bool = True,

View File

@ -104,8 +104,8 @@ class Gemma3ImageProcessor(BaseImageProcessor):
image_std: Optional[Union[float, List[float]]] = None,
do_convert_rgb: bool = None,
do_pan_and_scan: bool = None,
pan_and_scan_min_crop_size: int = None,
pan_and_scan_max_num_crops: int = None,
pan_and_scan_min_crop_size: Optional[int] = None,
pan_and_scan_max_num_crops: Optional[int] = None,
pan_and_scan_min_ratio_to_activate: float = None,
**kwargs,
) -> None:
@ -253,8 +253,8 @@ class Gemma3ImageProcessor(BaseImageProcessor):
input_data_format: Optional[Union[str, ChannelDimension]] = None,
do_convert_rgb: bool = None,
do_pan_and_scan: bool = None,
pan_and_scan_min_crop_size: int = None,
pan_and_scan_max_num_crops: int = None,
pan_and_scan_min_crop_size: Optional[int] = None,
pan_and_scan_max_num_crops: Optional[int] = None,
pan_and_scan_min_ratio_to_activate: float = None,
) -> PIL.Image.Image:
"""

View File

@ -509,7 +509,7 @@ class IdeficsAttention(nn.Module):
is_cross_attention: bool = False,
config: PretrainedConfig = None,
qk_layer_norms: bool = False,
layer_idx: int = None,
layer_idx: Optional[int] = None,
):
super().__init__()
self.hidden_size = hidden_size
@ -675,7 +675,7 @@ class IdeficsAttention(nn.Module):
# this was adapted from LlamaDecoderLayer
class IdeficsDecoderLayer(nn.Module):
def __init__(self, config: IdeficsConfig, layer_idx: int = None):
def __init__(self, config: IdeficsConfig, layer_idx: Optional[int] = None):
super().__init__()
self.hidden_size = config.hidden_size
self.self_attn = IdeficsAttention(
@ -754,7 +754,7 @@ class IdeficsDecoderLayer(nn.Module):
class IdeficsGatedCrossAttentionLayer(nn.Module):
def __init__(self, config: IdeficsConfig, layer_idx: int = None):
def __init__(self, config: IdeficsConfig, layer_idx: Optional[int] = None):
super().__init__()
self.hidden_size = config.hidden_size
self.cross_attn = IdeficsAttention(

View File

@ -89,7 +89,9 @@ class Idefics2Processor(ProcessorMixin):
image_processor_class = "Idefics2ImageProcessor"
tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: str = None, **kwargs):
def __init__(
self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: Optional[str] = None, **kwargs
):
if image_processor is None:
raise ValueError("You need to specify an `image_processor`.")
if tokenizer is None:

View File

@ -133,7 +133,9 @@ class Idefics3Processor(ProcessorMixin):
image_processor_class = "Idefics3ImageProcessor"
tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: str = None, **kwargs):
def __init__(
self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: Optional[str] = None, **kwargs
):
if image_processor is None:
raise ValueError("You need to specify an `image_processor`.")
if tokenizer is None:

View File

@ -19,6 +19,7 @@ import json
from collections import OrderedDict
from functools import partial
from pathlib import Path
from typing import Optional
import timm
import torch
@ -79,7 +80,7 @@ def convert_weight_and_push(
print(f"Pushed {checkpoint_name}")
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
filename = "imagenet-1k-id2label.json"
num_labels = 1000
expected_shape = (1, num_labels)

View File

@ -333,7 +333,7 @@ class LlavaNextImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
@ -563,7 +563,7 @@ class LlavaNextImageProcessor(BaseImageProcessor):
image_grid_pinpoints: List = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -183,7 +183,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
@ -283,7 +283,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -577,7 +577,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
image: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,
@ -601,7 +601,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
image: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,

View File

@ -1592,7 +1592,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
def forward_post(
self,
hidden_states: torch.Tensor,
level_index: int = None,
level_index: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
query_position_embeddings: Optional[torch.Tensor] = None,
@ -1651,7 +1651,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
def forward_pre(
self,
hidden_states: torch.Tensor,
level_index: int = None,
level_index: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
query_position_embeddings: Optional[torch.Tensor] = None,
@ -1712,7 +1712,7 @@ class Mask2FormerMaskedAttentionDecoderLayer(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
level_index: int = None,
level_index: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
query_position_embeddings: Optional[torch.Tensor] = None,
@ -2013,7 +2013,9 @@ class Mask2FormerMaskPredictor(nn.Module):
self.mask_embedder = Mask2FormerMLPPredictionHead(self.hidden_size, self.hidden_size, mask_feature_size)
def forward(self, outputs: torch.Tensor, pixel_embeddings: torch.Tensor, attention_mask_target_size: int = None):
def forward(
self, outputs: torch.Tensor, pixel_embeddings: torch.Tensor, attention_mask_target_size: Optional[int] = None
):
mask_embeddings = self.mask_embedder(outputs.transpose(0, 1))
is_tracing = torch.jit.is_tracing() or isinstance(outputs, torch.fx.Proxy) or is_torchdynamo_compiling()

View File

@ -578,7 +578,7 @@ class MaskFormerImageProcessor(BaseImageProcessor):
image: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,
@ -602,7 +602,7 @@ class MaskFormerImageProcessor(BaseImageProcessor):
image: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,

View File

@ -1316,7 +1316,7 @@ class MimiVectorQuantization(nn.Module):
class MimiResidualVectorQuantizer(nn.Module):
"""Residual Vector Quantizer."""
def __init__(self, config: MimiConfig, num_quantizers: int = None):
def __init__(self, config: MimiConfig, num_quantizers: Optional[int] = None):
super().__init__()
self.codebook_size = config.codebook_size
self.frame_rate = config.frame_rate

View File

@ -437,7 +437,7 @@ class MoonshineEncoderLayer(nn.Module):
class MoonshineDecoderLayer(nn.Module):
def __init__(self, config: MoonshineConfig, layer_idx: int = None):
def __init__(self, config: MoonshineConfig, layer_idx: Optional[int] = None):
super().__init__()
self.hidden_size = config.hidden_size

View File

@ -427,7 +427,7 @@ class MoonshineEncoderLayer(LlamaDecoderLayer):
class MoonshineDecoderLayer(nn.Module):
def __init__(self, config: MoonshineConfig, layer_idx: int = None):
def __init__(self, config: MoonshineConfig, layer_idx: Optional[int] = None):
super().__init__()
self.hidden_size = config.hidden_size

View File

@ -420,7 +420,7 @@ class MoshiGatingMLP(nn.Module):
self.fc1 = MoshiFlexibleLinear(hidden_size, ffn_dim, num_layers)
self.fc2 = MoshiFlexibleLinear(ffn_dim // 2, hidden_size, num_layers)
def forward(self, hidden_states: torch.Tensor, layer_idx: int = None) -> torch.Tensor:
def forward(self, hidden_states: torch.Tensor, layer_idx: Optional[int] = None) -> torch.Tensor:
hidden_states = self.fc1(hidden_states) if layer_idx is None else self.fc1(hidden_states, layer_idx)
batch_size, sequence_length, _ = hidden_states.shape
@ -2644,7 +2644,7 @@ class MoshiForConditionalGeneration(MoshiPreTrainedModel, GenerationMixin):
return input_ids
def build_delay_pattern_mask(
self, input_ids: torch.LongTensor, bos_token_id: int, pad_token_id: int, max_length: int = None
self, input_ids: torch.LongTensor, bos_token_id: int, pad_token_id: int, max_length: Optional[int] = None
):
"""Build a delayed pattern mask to the input_ids. Each codebook, except the first one, is offset by
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there

View File

@ -1377,7 +1377,9 @@ class MusicgenForCausalLM(MusicgenPreTrainedModel, GenerationMixin):
"use_cache": use_cache,
}
def build_delay_pattern_mask(self, input_ids: torch.LongTensor, pad_token_id: int, max_length: int = None):
def build_delay_pattern_mask(
self, input_ids: torch.LongTensor, pad_token_id: int, max_length: Optional[int] = None
):
"""Build a delayed pattern mask to the input_ids. Each codebook is offset by the previous codebook by
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there
are 4 codebooks and a max sequence length of 8, we have the delayed pattern mask of shape `(codebooks,
@ -1828,9 +1830,9 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
@classmethod
def from_sub_models_pretrained(
cls,
text_encoder_pretrained_model_name_or_path: str = None,
audio_encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
text_encoder_pretrained_model_name_or_path: Optional[str] = None,
audio_encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:
@ -2232,8 +2234,8 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
batch_size: int,
model_input_name: str,
model_kwargs: Dict[str, torch.Tensor],
decoder_start_token_id: int = None,
bos_token_id: int = None,
decoder_start_token_id: Optional[int] = None,
bos_token_id: Optional[int] = None,
device: torch.device = None,
) -> Tuple[torch.LongTensor, Dict[str, torch.Tensor]]:
"""Prepares `decoder_input_ids` for generation with encoder-decoder models"""
@ -2454,7 +2456,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin):
return torch.ones((batch_size, 1), dtype=torch.long, device=self.device) * bos_token_id
def _get_decoder_start_token_id(
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: int = None
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None
) -> int:
decoder_start_token_id = (
decoder_start_token_id

View File

@ -1297,7 +1297,9 @@ class MusicgenMelodyForCausalLM(MusicgenMelodyPreTrainedModel, GenerationMixin):
"use_cache": use_cache,
}
def build_delay_pattern_mask(self, input_ids: torch.LongTensor, pad_token_id: int, max_length: int = None):
def build_delay_pattern_mask(
self, input_ids: torch.LongTensor, pad_token_id: int, max_length: Optional[int] = None
):
"""Build a delayed pattern mask to the input_ids. Each codebook is offset by the previous codebook by
one, giving a delayed pattern mask at the start of sequence and end of sequence. Take the example where there
are 4 codebooks and a max sequence length of 8, we have the delayed pattern mask of shape `(codebooks,
@ -1706,9 +1708,9 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
# Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration.from_sub_models_pretrained with Musicgen->MusicgenMelody, musicgen-small->musicgen-melody
def from_sub_models_pretrained(
cls,
text_encoder_pretrained_model_name_or_path: str = None,
audio_encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
text_encoder_pretrained_model_name_or_path: Optional[str] = None,
audio_encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:
@ -2112,8 +2114,8 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
batch_size: int,
model_input_name: str,
model_kwargs: Dict[str, torch.Tensor],
decoder_start_token_id: int = None,
bos_token_id: int = None,
decoder_start_token_id: Optional[int] = None,
bos_token_id: Optional[int] = None,
device: torch.device = None,
) -> Tuple[torch.LongTensor, Dict[str, torch.Tensor]]:
"""Prepares `decoder_input_ids` for generation with encoder-decoder models"""
@ -2304,7 +2306,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin):
# Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration._get_decoder_start_token_id
def _get_decoder_start_token_id(
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: int = None
self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None
) -> int:
decoder_start_token_id = (
decoder_start_token_id

View File

@ -19,7 +19,7 @@ Fast tokenizer class for Nougat.
import re
from functools import partial
from multiprocessing import Pool
from typing import List, Union
from typing import List, Optional, Union
import numpy as np
@ -584,7 +584,7 @@ class NougatTokenizerFast(PreTrainedTokenizerFast):
self,
generation: Union[str, List[str]],
fix_markdown: bool = True,
num_workers: int = None,
num_workers: Optional[int] = None,
) -> Union[str, List[str]]:
"""
Postprocess a generated text or a list of generated texts.

View File

@ -440,7 +440,7 @@ class OneFormerImageProcessor(BaseImageProcessor):
ignore_index: Optional[int] = None,
do_reduce_labels: bool = False,
repo_path: Optional[str] = "shi-labs/oneformer_demo",
class_info_file: str = None,
class_info_file: Optional[str] = None,
num_text: Optional[int] = None,
num_labels: Optional[int] = None,
**kwargs,

View File

@ -105,7 +105,7 @@ class OPTAttention(nn.Module):
def __init__(
self,
config: OPTConfig,
layer_idx: int = None,
layer_idx: Optional[int] = None,
**kwargs,
):
super().__init__()
@ -369,7 +369,7 @@ OPT_ATTENTION_CLASSES = {
class OPTDecoderLayer(nn.Module):
def __init__(self, config: OPTConfig, layer_idx: int = None):
def __init__(self, config: OPTConfig, layer_idx: Optional[int] = None):
super().__init__()
self.embed_dim = config.hidden_size

View File

@ -215,7 +215,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
images: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
crop_pct: int = None,
crop_pct: Optional[int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: Dict[str, int] = None,

View File

@ -152,7 +152,7 @@ class PromptDepthAnythingImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = False,
size_divisor: int = None,
size_divisor: Optional[int] = None,
prompt_scale_to_meter: float = 0.001, # default unit is mm
**kwargs,
):

View File

@ -132,8 +132,8 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_convert_rgb: bool = True,
min_pixels: int = None,
max_pixels: int = None,
min_pixels: Optional[int] = None,
max_pixels: Optional[int] = None,
patch_size: int = 14,
temporal_patch_size: int = 2,
merge_size: int = 2,
@ -177,9 +177,9 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
patch_size: int = None,
temporal_patch_size: int = None,
merge_size: int = None,
patch_size: Optional[int] = None,
temporal_patch_size: Optional[int] = None,
merge_size: Optional[int] = None,
do_convert_rgb: bool = None,
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
@ -304,17 +304,17 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
videos: VideoInput = None,
do_resize: bool = None,
size: Dict[str, int] = None,
min_pixels: int = None,
max_pixels: int = None,
min_pixels: Optional[int] = None,
max_pixels: Optional[int] = None,
resample: PILImageResampling = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
patch_size: int = None,
temporal_patch_size: int = None,
merge_size: int = None,
patch_size: Optional[int] = None,
temporal_patch_size: Optional[int] = None,
merge_size: Optional[int] = None,
do_convert_rgb: bool = None,
return_tensors: Optional[Union[str, TensorType]] = None,
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,

View File

@ -263,11 +263,11 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast):
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
min_pixels: int = None,
max_pixels: int = None,
patch_size: int = None,
temporal_patch_size: int = None,
merge_size: int = None,
min_pixels: Optional[int] = None,
max_pixels: Optional[int] = None,
patch_size: Optional[int] = None,
temporal_patch_size: Optional[int] = None,
merge_size: Optional[int] = None,
do_convert_rgb: bool = None,
return_tensors: Optional[Union[str, TensorType]] = None,
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,

View File

@ -245,8 +245,8 @@ class RagPreTrainedModel(PreTrainedModel):
@classmethod
def from_pretrained_question_encoder_generator(
cls,
question_encoder_pretrained_model_name_or_path: str = None,
generator_pretrained_model_name_or_path: str = None,
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
generator_pretrained_model_name_or_path: Optional[str] = None,
retriever: RagRetriever = None,
**kwargs,
) -> PreTrainedModel:

View File

@ -232,8 +232,8 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
@classmethod
def from_pretrained_question_encoder_generator(
cls,
question_encoder_pretrained_model_name_or_path: str = None,
generator_pretrained_model_name_or_path: str = None,
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
generator_pretrained_model_name_or_path: Optional[str] = None,
retriever: RagRetriever = None,
*model_args,
**kwargs,

View File

@ -81,7 +81,7 @@ class RagTokenizer:
max_length: Optional[int] = None,
max_target_length: Optional[int] = None,
padding: str = "longest",
return_tensors: str = None,
return_tensors: Optional[str] = None,
truncation: bool = True,
**kwargs,
) -> BatchEncoding:

View File

@ -25,7 +25,7 @@ from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from pprint import pprint
from typing import Dict, List, Tuple
from typing import Dict, List, Optional, Tuple
import torch
import torch.nn as nn
@ -159,7 +159,7 @@ def get_from_to_our_keys(model_name: str) -> Dict[str, str]:
return from_to_ours_keys
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
filename = "imagenet-1k-id2label.json"
num_labels = 1000

View File

@ -19,7 +19,7 @@ import json
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Callable, Dict, List, Tuple
from typing import Callable, Dict, List, Optional, Tuple
import timm
import torch
@ -218,7 +218,7 @@ def convert_weight_and_push(
print(f"Pushed {name}")
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
filename = "imagenet-1k-id2label.json"
num_labels = 1000
expected_shape = (1, num_labels)

View File

@ -19,7 +19,7 @@ import json
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import List
from typing import List, Optional
import timm
import torch
@ -122,7 +122,7 @@ def convert_weight_and_push(name: str, config: ResNetConfig, save_directory: Pat
print(f"Pushed {checkpoint_name}")
def convert_weights_and_push(save_directory: Path, model_name: str = None, push_to_hub: bool = True):
def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = None, push_to_hub: bool = True):
filename = "imagenet-1k-id2label.json"
num_labels = 1000
expected_shape = (1, num_labels)

View File

@ -770,8 +770,8 @@ class RoCBertTokenizer(PreTrainedTokenizer):
self,
token_ids_0: List[int],
token_ids_1: Optional[List[int]] = None,
cls_token_id: int = None,
sep_token_id: int = None,
cls_token_id: Optional[int] = None,
sep_token_id: Optional[int] = None,
) -> List[int]:
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and

View File

@ -127,8 +127,8 @@ class SamImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: bool = True,
pad_size: int = None,
mask_pad_size: int = None,
pad_size: Optional[int] = None,
mask_pad_size: Optional[int] = None,
do_convert_rgb: bool = True,
**kwargs,
) -> None:

View File

@ -325,8 +325,8 @@ class TFSegformerMixFFN(keras.layers.Layer):
self,
config: SegformerConfig,
in_features: int,
hidden_features: int = None,
out_features: int = None,
hidden_features: Optional[int] = None,
out_features: Optional[int] = None,
**kwargs,
):
super().__init__(**kwargs)

View File

@ -52,7 +52,7 @@ class SiglipProcessor(ProcessorMixin):
images: ImageInput = None,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
max_length: int = None,
max_length: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
) -> BatchFeature:
"""

View File

@ -141,7 +141,9 @@ class SmolVLMProcessor(ProcessorMixin):
image_processor_class = "SmolVLMImageProcessor"
tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: str = None, **kwargs):
def __init__(
self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: Optional[str] = None, **kwargs
):
self.fake_image_token = getattr(tokenizer, "fake_image_token", "<fake_token_around_image>")
self.image_token = getattr(tokenizer, "image_token", "<image>")
self.end_of_utterance_token = getattr(tokenizer, "end_of_utterance_token", "<end_of_utterance>")

View File

@ -291,8 +291,8 @@ class SpeechEncoderDecoderModel(PreTrainedModel, GenerationMixin):
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:

View File

@ -247,8 +247,8 @@ class TapasTokenizer(PreTrainedTokenizer):
tokenize_chinese_chars=True,
strip_accents=None,
cell_trim_length: int = -1,
max_column_id: int = None,
max_row_id: int = None,
max_column_id: Optional[int] = None,
max_row_id: Optional[int] = None,
strip_column_names: bool = False,
update_answer_coordinates: bool = False,
min_question_length=None,
@ -2242,8 +2242,8 @@ class NumericValue:
@dataclass
class NumericValueSpan:
begin_index: int = None
end_index: int = None
begin_index: Optional[int] = None
end_index: Optional[int] = None
values: List[NumericValue] = None

View File

@ -205,10 +205,10 @@ class TextNetImageProcessor(BaseImageProcessor):
images: ImageInput,
do_resize: bool = None,
size: Dict[str, int] = None,
size_divisor: int = None,
size_divisor: Optional[int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,

View File

@ -144,8 +144,8 @@ class TrOCRAttention(nn.Module):
config,
embed_dim: int,
num_heads: int,
kdim: int = None,
vdim: int = None,
kdim: Optional[int] = None,
vdim: Optional[int] = None,
dropout: float = 0.0,
is_decoder: bool = False,
bias: bool = True,

View File

@ -178,7 +178,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
size: Dict[str, int] = None,
resample: PILImageResampling = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_rescale: bool = None,
rescale_factor: float = None,
do_normalize: bool = None,
@ -332,7 +332,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_center_crop: bool = None,
crop_size: int = None,
crop_size: Optional[int] = None,
do_convert_rgb: bool = None,
data_format: ChannelDimension = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,

View File

@ -309,8 +309,8 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:

View File

@ -380,8 +380,8 @@ class VisionEncoderDecoderModel(PreTrainedModel, GenerationMixin):
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: str = None,
decoder_pretrained_model_name_or_path: str = None,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:

View File

@ -414,8 +414,8 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
@classmethod
def from_vision_text_pretrained(
cls,
vision_model_name_or_path: str = None,
text_model_name_or_path: str = None,
vision_model_name_or_path: Optional[str] = None,
text_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> FlaxPreTrainedModel:

View File

@ -465,8 +465,8 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
@classmethod
def from_vision_text_pretrained(
cls,
vision_model_name_or_path: str = None,
text_model_name_or_path: str = None,
vision_model_name_or_path: Optional[str] = None,
text_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:

View File

@ -417,8 +417,8 @@ class VisionTextDualEncoderModel(PreTrainedModel):
@classmethod
def from_vision_text_pretrained(
cls,
vision_model_name_or_path: str = None,
text_model_name_or_path: str = None,
vision_model_name_or_path: Optional[str] = None,
text_model_name_or_path: Optional[str] = None,
*model_args,
**kwargs,
) -> PreTrainedModel:

View File

@ -14,6 +14,8 @@
# limitations under the License.
"""VitPose model configuration"""
from typing import Optional
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ...utils.backbone_utils import verify_backbone_config_arguments
@ -75,11 +77,11 @@ class VitPoseConfig(PretrainedConfig):
def __init__(
self,
backbone_config: PretrainedConfig = None,
backbone: str = None,
backbone_config: Optional[PretrainedConfig] = None,
backbone: Optional[str] = None,
use_pretrained_backbone: bool = False,
use_timm_backbone: bool = False,
backbone_kwargs: dict = None,
backbone_kwargs: Optional[dict] = None,
initializer_range: float = 0.02,
scale_factor: int = 4,
use_simple_decoder: bool = True,

View File

@ -652,7 +652,7 @@ class WhisperEncoderLayer(nn.Module):
class WhisperDecoderLayer(nn.Module):
def __init__(self, config: WhisperConfig, layer_idx: int = None):
def __init__(self, config: WhisperConfig, layer_idx: Optional[int] = None):
super().__init__()
self.embed_dim = config.d_model

View File

@ -377,7 +377,9 @@ class WhisperTokenizer(PreTrainedTokenizer):
self.cache[token] = word
return word
def set_prefix_tokens(self, language: str = None, task: str = None, predict_timestamps: bool = None):
def set_prefix_tokens(
self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None
):
"""
Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to
update the prefix tokens as required when fine-tuning. Example:
@ -1276,7 +1278,7 @@ def _collate_word_timestamps(tokenizer, tokens, token_timestamps, language, retu
def _combine_tokens_into_words(
tokenizer,
tokens: List[int],
language: str = None,
language: Optional[str] = None,
prepend_punctuations: str = "\"'“¡¿([{-",
append_punctuations: str = "\"'.。,!?::”)]}、",
):

View File

@ -451,7 +451,9 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast):
return tuple(files) + (normalizer_file,)
def set_prefix_tokens(self, language: str = None, task: str = None, predict_timestamps: bool = None):
def set_prefix_tokens(
self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None
):
"""
Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to
update the prefix tokens as required when fine-tuning. Example:

View File

@ -679,7 +679,7 @@ class ZambaMambaDecoderLayer(nn.Module):
self,
hidden_states: torch.Tensor,
original_hidden_states: Optional[torch.Tensor] = None,
layer_idx: int = None,
layer_idx: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
causal_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[ZambaHybridDynamicCache] = None,
@ -747,7 +747,7 @@ class ZambaHybridLayer(nn.Module):
self,
hidden_states: torch.Tensor,
original_hidden_states: Optional[torch.Tensor] = None,
layer_idx: int = None,
layer_idx: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
causal_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[ZambaHybridDynamicCache] = None,

View File

@ -385,8 +385,8 @@ class Zamba2Attention(nn.Module):
self,
config: Zamba2Config,
layer_idx: Optional[int] = None,
num_fwd_mem_blocks: int = None,
block_id: int = None,
num_fwd_mem_blocks: Optional[int] = None,
block_id: Optional[int] = None,
):
super().__init__()
self.config = config
@ -560,7 +560,7 @@ class Zamba2MambaMixer(nn.Module):
and is why Mamba is called **selective** state spaces)
"""
def __init__(self, config: Zamba2Config, layer_idx: int = None):
def __init__(self, config: Zamba2Config, layer_idx: Optional[int] = None):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
@ -983,7 +983,7 @@ class Zamba2MambaMixer(nn.Module):
class Zamba2MLP(nn.Module):
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: int = None):
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: Optional[int] = None):
"""
This MLP layer contributes to tied transformer blocks aimed to increasing compute without increasing model size. Because this layer
is tied, un-tied adapter modules (formally same as LoRA, but used in the base model) are added to the up and gate projectors to increase expressivity with a small memory overhead.
@ -1025,7 +1025,7 @@ class Zamba2MLP(nn.Module):
class Zamba2AttentionDecoderLayer(nn.Module):
def __init__(self, config: Zamba2Config, block_id: int = None, layer_idx: Optional[int] = None):
def __init__(self, config: Zamba2Config, block_id: Optional[int] = None, layer_idx: Optional[int] = None):
super().__init__()
self.block_id = block_id
num_gs = len(config.hybrid_layer_ids)
@ -1099,7 +1099,7 @@ class Zamba2MambaDecoderLayer(nn.Module):
self,
hidden_states: torch.Tensor,
original_hidden_states: Optional[torch.Tensor] = None,
layer_idx: int = None,
layer_idx: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
causal_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[Zamba2HybridDynamicCache] = None,
@ -1169,7 +1169,7 @@ class Zamba2HybridLayer(nn.Module):
self,
hidden_states: torch.Tensor,
original_hidden_states: Optional[torch.Tensor] = None,
layer_idx: int = None,
layer_idx: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
causal_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[Zamba2HybridDynamicCache] = None,

View File

@ -199,8 +199,8 @@ class Zamba2Attention(ZambaAttention):
self,
config: Zamba2Config,
layer_idx: Optional[int] = None,
num_fwd_mem_blocks: int = None,
block_id: int = None,
num_fwd_mem_blocks: Optional[int] = None,
block_id: Optional[int] = None,
):
super().__init__(config, layer_idx)
self.num_fwd_mem_blocks = num_fwd_mem_blocks
@ -302,7 +302,7 @@ class Zamba2MambaMixer(nn.Module):
and is why Mamba is called **selective** state spaces)
"""
def __init__(self, config: Zamba2Config, layer_idx: int = None):
def __init__(self, config: Zamba2Config, layer_idx: Optional[int] = None):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
@ -725,7 +725,7 @@ class Zamba2MambaMixer(nn.Module):
class Zamba2MLP(nn.Module):
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: int = None):
def __init__(self, config: Zamba2Config, num_fwd_mem_blocks=None, block_id: Optional[int] = None):
"""
This MLP layer contributes to tied transformer blocks aimed to increasing compute without increasing model size. Because this layer
is tied, un-tied adapter modules (formally same as LoRA, but used in the base model) are added to the up and gate projectors to increase expressivity with a small memory overhead.
@ -767,7 +767,7 @@ class Zamba2MLP(nn.Module):
class Zamba2AttentionDecoderLayer(ZambaAttentionDecoderLayer):
def __init__(self, config: Zamba2Config, block_id: int = None, layer_idx: Optional[int] = None):
def __init__(self, config: Zamba2Config, block_id: Optional[int] = None, layer_idx: Optional[int] = None):
self.block_id = block_id
num_gs = len(config.hybrid_layer_ids)
super().__init__(config, layer_idx)
@ -847,7 +847,7 @@ class Zamba2HybridLayer(ZambaHybridLayer):
self,
hidden_states: torch.Tensor,
original_hidden_states: Optional[torch.Tensor] = None,
layer_idx: int = None,
layer_idx: Optional[int] = None,
attention_mask: Optional[torch.Tensor] = None,
causal_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[Zamba2HybridDynamicCache] = None,

View File

@ -305,9 +305,9 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_resize: bool = None,
size: int = None,
size: Optional[int] = None,
keep_aspect_ratio: bool = None,
ensure_multiple_of: int = None,
ensure_multiple_of: Optional[int] = None,
resample: PILImageResampling = None,
return_tensors: Optional[Union[str, TensorType]] = None,
data_format: ChannelDimension = ChannelDimension.FIRST,

View File

@ -291,7 +291,7 @@ class OnnxConfig(ABC):
sampling_rate: int = 22050,
time_duration: float = 5.0,
frequency: int = 220,
tokenizer: "PreTrainedTokenizerBase" = None,
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
) -> Mapping[str, Any]:
"""
Generate inputs to provide to the ONNX exporter for the specific framework
@ -445,7 +445,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
self,
config: "PretrainedConfig",
task: str = "default",
patching_specs: List[PatchingSpec] = None,
patching_specs: Optional[list[PatchingSpec]] = None,
use_past: bool = False,
):
super().__init__(config, task=task, patching_specs=patching_specs)
@ -639,7 +639,7 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast):
def generate_dummy_inputs(
self,
tokenizer: "PreTrainedTokenizerBase",
tokenizer: Optional["PreTrainedTokenizerBase"],
batch_size: int = -1,
seq_length: int = -1,
is_pair: bool = False,

View File

@ -16,7 +16,7 @@ import warnings
from inspect import signature
from itertools import chain
from pathlib import Path
from typing import TYPE_CHECKING, Iterable, List, Tuple, Union
from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union
import numpy as np
from packaging.version import Version, parse
@ -85,7 +85,7 @@ def export_pytorch(
config: OnnxConfig,
opset: int,
output: Path,
tokenizer: "PreTrainedTokenizer" = None,
tokenizer: Optional["PreTrainedTokenizer"] = None,
device: str = "cpu",
) -> Tuple[List[str], List[str]]:
"""
@ -188,7 +188,7 @@ def export_tensorflow(
config: OnnxConfig,
opset: int,
output: Path,
tokenizer: "PreTrainedTokenizer" = None,
tokenizer: Optional["PreTrainedTokenizer"] = None,
) -> Tuple[List[str], List[str]]:
"""
Export a TensorFlow model to an ONNX Intermediate Representation (IR)
@ -254,7 +254,7 @@ def export(
config: OnnxConfig,
opset: int,
output: Path,
tokenizer: "PreTrainedTokenizer" = None,
tokenizer: Optional["PreTrainedTokenizer"] = None,
device: str = "cpu",
) -> Tuple[List[str], List[str]]:
"""
@ -321,7 +321,7 @@ def validate_model_outputs(
onnx_model: Path,
onnx_named_outputs: List[str],
atol: float,
tokenizer: "PreTrainedTokenizer" = None,
tokenizer: Optional["PreTrainedTokenizer"] = None,
):
from onnxruntime import InferenceSession, SessionOptions

View File

@ -531,7 +531,7 @@ class BatchEncoding(UserDict):
span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
return TokenSpan(*span) if span is not None else None
def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> CharSpan:
def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> Optional[CharSpan]:
"""
Get the character span corresponding to an encoded token in a sequence of the batch.
@ -2629,7 +2629,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Union[bool, str, TruncationStrategy, None] = None,
max_length: Optional[int] = None,
stride: int = 0,
padding_side: Optional[str] = None,
@ -2810,15 +2810,15 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
def __call__(
self,
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput], None] = None,
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput], None] = None,
text_pair_target: Optional[
Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]
] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Union[bool, str, TruncationStrategy, None] = None,
max_length: Optional[int] = None,
stride: int = 0,
is_split_into_words: bool = False,
@ -2905,7 +2905,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Union[bool, str, TruncationStrategy, None] = None,
max_length: Optional[int] = None,
stride: int = 0,
is_split_into_words: bool = False,
@ -3131,7 +3131,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
],
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Union[bool, str, TruncationStrategy, None] = None,
max_length: Optional[int] = None,
stride: int = 0,
is_split_into_words: bool = False,
@ -3807,7 +3807,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
self,
sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: bool = None,
clean_up_tokenization_spaces: Optional[bool] = None,
**kwargs,
) -> List[str]:
"""
@ -3841,7 +3841,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
self,
token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: bool = None,
clean_up_tokenization_spaces: Optional[bool] = None,
**kwargs,
) -> str:
"""
@ -3878,7 +3878,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
self,
token_ids: Union[int, List[int]],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: bool = None,
clean_up_tokenization_spaces: Optional[bool] = None,
**kwargs,
) -> str:
raise NotImplementedError

View File

@ -414,7 +414,7 @@ class Trainer:
@deprecate_kwarg("tokenizer", new_name="processing_class", version="5.0.0", raise_if_both_names=True)
def __init__(
self,
model: Union[PreTrainedModel, nn.Module] = None,
model: Union[PreTrainedModel, nn.Module, None] = None,
args: TrainingArguments = None,
data_collator: Optional[DataCollator] = None,
train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
@ -2139,7 +2139,7 @@ class Trainer:
def train(
self,
resume_from_checkpoint: Optional[Union[str, bool]] = None,
trial: Union["optuna.Trial", dict[str, Any]] = None,
trial: Union["optuna.Trial", dict[str, Any], None] = None,
ignore_keys_for_eval: Optional[list[str]] = None,
**kwargs,
):
@ -4920,10 +4920,10 @@ class Trainer:
logger.info(f" Num examples = {num_examples}")
logger.info(f" Batch size = {batch_size}")
losses_host: torch.Tensor = None
preds_host: Union[torch.Tensor, list[torch.Tensor]] = None
labels_host: Union[torch.Tensor, list[torch.Tensor]] = None
inputs_host: Union[torch.Tensor, list[torch.Tensor]] = None
losses_host: Optional[torch.Tensor] = None
preds_host: Union[torch.Tensor, list[torch.Tensor], None] = None
labels_host: Union[torch.Tensor, list[torch.Tensor], None] = None
inputs_host: Union[torch.Tensor, list[torch.Tensor], None] = None
metrics: Optional[dict] = None
eval_set_kwargs: dict = {}