diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py index 41855eaed6b..c491b01af94 100755 --- a/examples/legacy/seq2seq/run_distributed_eval.py +++ b/examples/legacy/seq2seq/run_distributed_eval.py @@ -19,6 +19,7 @@ import time from json import JSONDecodeError from logging import getLogger from pathlib import Path +from typing import Optional import torch from torch.utils.data import DataLoader @@ -54,7 +55,7 @@ def eval_data_dir( task="summarization", local_rank=None, num_return_sequences=1, - dataset_kwargs: dict = None, + dataset_kwargs: Optional[dict] = None, prefix="", **generate_kwargs, ) -> dict: diff --git a/examples/modular-transformers/image_processing_new_imgproc_model.py b/examples/modular-transformers/image_processing_new_imgproc_model.py index 8320a25228c..94274bb8f22 100644 --- a/examples/modular-transformers/image_processing_new_imgproc_model.py +++ b/examples/modular-transformers/image_processing_new_imgproc_model.py @@ -74,7 +74,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: dict[str, int] = None, + size: Optional[dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -159,7 +159,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, list[float]]] = None, image_std: Optional[Union[float, list[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> PIL.Image.Image: diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index cb2c8c5cdc2..7b6ba5ee3f5 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -359,7 +359,7 @@ class DynamicCache(Cache): ``` """ - def __init__(self, _distributed_cache_data: Iterable = None) -> None: + def __init__(self, _distributed_cache_data: Optional[Iterable] = None) -> None: super().__init__() self._seen_tokens = 0 # Used in `generate` to keep tally of how many tokens the cache has seen self.key_cache: List[torch.Tensor] = [] diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py index bfb812340e7..a16a02c4621 100644 --- a/src/transformers/commands/add_new_model_like.py +++ b/src/transformers/commands/add_new_model_like.py @@ -512,7 +512,7 @@ def duplicate_module( new_model_patterns: ModelPatterns, dest_file: Optional[str] = None, add_copied_from: bool = True, - attrs_to_remove: List[str] = None, + attrs_to_remove: Optional[List[str]] = None, ): """ Create a new module from an existing one and adapting all function and classes names from old patterns to new ones. diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index c8cc1cdbe97..5716ee4bf5c 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -19,6 +19,7 @@ allow to make our dependency on SentencePiece optional. """ import warnings +from typing import Optional from packaging import version from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors @@ -326,7 +327,9 @@ class OpenAIGPTConverter(Converter): class GPT2Converter(Converter): - def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer: + def converted( + self, vocab: Optional[dict[str, int]] = None, merges: Optional[list[tuple[str, str]]] = None + ) -> Tokenizer: if not vocab: vocab = self.original_tokenizer.encoder if not merges: @@ -395,7 +398,9 @@ class HerbertConverter(Converter): class Qwen2Converter(Converter): - def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer: + def converted( + self, vocab: Optional[dict[str, int]] = None, merges: Optional[list[tuple[str, str]]] = None + ) -> Tokenizer: if not vocab: vocab = self.original_tokenizer.encoder if not merges: diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py index dd08be29410..b3acbb3feb7 100644 --- a/src/transformers/image_processing_utils.py +++ b/src/transformers/image_processing_utils.py @@ -209,7 +209,7 @@ def convert_to_size_dict( def get_size_dict( - size: Union[int, Iterable[int], dict[str, int]] = None, + size: Optional[Union[int, Iterable[int], dict[str, int]]] = None, max_size: Optional[int] = None, height_width_order: bool = True, default_to_square: bool = True, diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index 644bb763347..2f590bce0e5 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -755,7 +755,7 @@ class BaseImageProcessorFast(BaseImageProcessor): class SemanticSegmentationMixin: - def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[list[tuple]] = None): """ Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/integrations/peft.py b/src/transformers/integrations/peft.py index 0c9402abe68..8a1652748f5 100644 --- a/src/transformers/integrations/peft.py +++ b/src/transformers/integrations/peft.py @@ -79,7 +79,7 @@ class PeftAdapterMixin: max_memory: Optional[str] = None, offload_folder: Optional[str] = None, offload_index: Optional[int] = None, - peft_config: Dict[str, Any] = None, + peft_config: Optional[Dict[str, Any]] = None, adapter_state_dict: Optional[Dict[str, "torch.Tensor"]] = None, low_cpu_mem_usage: bool = False, is_trainable: bool = False, diff --git a/src/transformers/models/albert/modeling_flax_albert.py b/src/transformers/models/albert/modeling_flax_albert.py index df2ebddc7e6..f7e5f222070 100644 --- a/src/transformers/models/albert/modeling_flax_albert.py +++ b/src/transformers/models/albert/modeling_flax_albert.py @@ -558,7 +558,7 @@ class FlaxAlbertPreTrainedModel(FlaxPreTrainedModel): attention_mask=None, token_type_ids=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/aria/configuration_aria.py b/src/transformers/models/aria/configuration_aria.py index f3faa60ca3d..5843e726d64 100644 --- a/src/transformers/models/aria/configuration_aria.py +++ b/src/transformers/models/aria/configuration_aria.py @@ -18,7 +18,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...modeling_rope_utils import rope_config_validation @@ -268,7 +268,7 @@ class AriaConfig(PretrainedConfig): vision_config=None, vision_feature_layer: int = -1, text_config: AriaTextConfig = None, - projector_patch_to_query_dict: Dict = None, + projector_patch_to_query_dict: Optional[Dict] = None, image_token_index: int = 9, initializer_range: float = 0.02, **kwargs, diff --git a/src/transformers/models/aria/image_processing_aria.py b/src/transformers/models/aria/image_processing_aria.py index 364f8f70df1..d1a722e9054 100644 --- a/src/transformers/models/aria/image_processing_aria.py +++ b/src/transformers/models/aria/image_processing_aria.py @@ -124,8 +124,8 @@ class AriaImageProcessor(BaseImageProcessor): def __init__( self, - image_mean: List[float] = None, - image_std: List[float] = None, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, max_image_size: int = 980, min_image_size: int = 336, split_resolutions: Optional[List[Tuple[int, int]]] = None, diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index add5bdc16b7..51e203b07b2 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -276,7 +276,7 @@ class AriaConfig(PretrainedConfig): vision_config=None, vision_feature_layer: int = -1, text_config: AriaTextConfig = None, - projector_patch_to_query_dict: Dict = None, + projector_patch_to_query_dict: Optional[Dict] = None, image_token_index: int = 9, initializer_range: float = 0.02, **kwargs, @@ -514,8 +514,8 @@ class AriaImageProcessor(BaseImageProcessor): def __init__( self, - image_mean: List[float] = None, - image_std: List[float] = None, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, max_image_size: int = 980, min_image_size: int = 336, split_resolutions: Optional[List[Tuple[int, int]]] = None, diff --git a/src/transformers/models/bark/configuration_bark.py b/src/transformers/models/bark/configuration_bark.py index 932bad618aa..e8e304d218a 100644 --- a/src/transformers/models/bark/configuration_bark.py +++ b/src/transformers/models/bark/configuration_bark.py @@ -14,7 +14,7 @@ # limitations under the License. """BARK model configuration""" -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import add_start_docstrings, logging @@ -243,10 +243,10 @@ class BarkConfig(PretrainedConfig): def __init__( self, - semantic_config: Dict = None, - coarse_acoustics_config: Dict = None, - fine_acoustics_config: Dict = None, - codec_config: Dict = None, + semantic_config: Optional[Dict] = None, + coarse_acoustics_config: Optional[Dict] = None, + fine_acoustics_config: Optional[Dict] = None, + codec_config: Optional[Dict] = None, initializer_range=0.02, **kwargs, ): diff --git a/src/transformers/models/bark/generation_configuration_bark.py b/src/transformers/models/bark/generation_configuration_bark.py index 00ff22c8b89..bb1fc266550 100644 --- a/src/transformers/models/bark/generation_configuration_bark.py +++ b/src/transformers/models/bark/generation_configuration_bark.py @@ -15,7 +15,7 @@ """BARK model generation configuration""" import copy -from typing import Dict +from typing import Dict, Optional from ...generation.configuration_utils import GenerationConfig from ...utils import logging @@ -245,9 +245,9 @@ class BarkGenerationConfig(GenerationConfig): def __init__( self, - semantic_config: Dict = None, - coarse_acoustics_config: Dict = None, - fine_acoustics_config: Dict = None, + semantic_config: Optional[Dict] = None, + coarse_acoustics_config: Optional[Dict] = None, + fine_acoustics_config: Optional[Dict] = None, sample_rate=24_000, codebook_size=1024, **kwargs, diff --git a/src/transformers/models/bart/modeling_flax_bart.py b/src/transformers/models/bart/modeling_flax_bart.py index 18c8f6b85cc..f04ab551e39 100644 --- a/src/transformers/models/bart/modeling_flax_bart.py +++ b/src/transformers/models/bart/modeling_flax_bart.py @@ -1007,7 +1007,7 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1068,12 +1068,12 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1186,7 +1186,7 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1335,12 +1335,12 @@ class FlaxBartForConditionalGeneration(FlaxBartPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1807,8 +1807,8 @@ class FlaxBartDecoderPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index eb2950f0e20..a83cf10aad7 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -106,10 +106,10 @@ class BeitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -194,10 +194,10 @@ class BeitImageProcessor(BaseImageProcessor): image: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -226,10 +226,10 @@ class BeitImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -271,10 +271,10 @@ class BeitImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_reduce_labels: Optional[bool] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -320,10 +320,10 @@ class BeitImageProcessor(BaseImageProcessor): images: ImageInput, segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -470,7 +470,7 @@ class BeitImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`BeitForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py index d37eedea3f4..b51ff9fd094 100644 --- a/src/transformers/models/beit/modeling_flax_beit.py +++ b/src/transformers/models/beit/modeling_flax_beit.py @@ -634,7 +634,7 @@ class FlaxBeitPreTrainedModel(FlaxPreTrainedModel): self, pixel_values, bool_masked_pos=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/bert/modeling_flax_bert.py b/src/transformers/models/bert/modeling_flax_bert.py index 61939a53f4a..48b72193fa2 100644 --- a/src/transformers/models/bert/modeling_flax_bert.py +++ b/src/transformers/models/bert/modeling_flax_bert.py @@ -864,13 +864,13 @@ class FlaxBertPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/big_bird/modeling_flax_big_bird.py b/src/transformers/models/big_bird/modeling_flax_big_bird.py index e3bdfc38daf..18913e930d5 100644 --- a/src/transformers/models/big_bird/modeling_flax_big_bird.py +++ b/src/transformers/models/big_bird/modeling_flax_big_bird.py @@ -1725,14 +1725,14 @@ class FlaxBigBirdPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: Optional[jax.random.PRNGKey] = None, indices_rng: Optional[jax.random.PRNGKey] = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2442,7 +2442,7 @@ class FlaxBigBirdForQuestionAnswering(FlaxBigBirdPreTrainedModel): position_ids=None, head_mask=None, question_lengths=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: Optional[jax.random.PRNGKey] = None, indices_rng: Optional[jax.random.PRNGKey] = None, train: bool = False, diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index 2b1f307a29f..aa2eb379551 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -92,10 +92,10 @@ class BitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -177,7 +177,7 @@ class BitImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py index 1e0775cd08c..835cb6814a7 100644 --- a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py @@ -980,7 +980,7 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1043,12 +1043,12 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1161,7 +1161,7 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1311,12 +1311,12 @@ class FlaxBlenderbotForConditionalGeneration(FlaxBlenderbotPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py index 6aceaa611c9..1e6a3a727a6 100644 --- a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py @@ -977,7 +977,7 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1040,12 +1040,12 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1157,7 +1157,7 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1308,12 +1308,12 @@ class FlaxBlenderbotSmallForConditionalGeneration(FlaxBlenderbotSmallPreTrainedM encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, deterministic: bool = True, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index 9f28b33a66b..ace61142ec8 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -83,7 +83,7 @@ class BlipImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index ca10c7ce7ed..fb6fc00b94d 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -148,7 +148,7 @@ class BloomOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/bloom/modeling_flax_bloom.py b/src/transformers/models/bloom/modeling_flax_bloom.py index 51ccb4c3625..d0b2f084d37 100644 --- a/src/transformers/models/bloom/modeling_flax_bloom.py +++ b/src/transformers/models/bloom/modeling_flax_bloom.py @@ -463,8 +463,8 @@ class FlaxBloomPreTrainedModel(FlaxPreTrainedModel): self, input_ids, attention_mask=None, - past_key_values: dict = None, - params: dict = None, + past_key_values: Optional[dict] = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 95eaa9f88b9..1f651eba1d0 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -172,7 +172,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, @@ -181,7 +181,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_pad: bool = True, **kwargs, ) -> None: @@ -385,7 +385,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/camembert/modeling_camembert.py b/src/transformers/models/camembert/modeling_camembert.py index b69590ae21a..f1ab1565038 100644 --- a/src/transformers/models/camembert/modeling_camembert.py +++ b/src/transformers/models/camembert/modeling_camembert.py @@ -1581,7 +1581,7 @@ class CamembertForCausalLM(CamembertPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/chameleon/configuration_chameleon.py b/src/transformers/models/chameleon/configuration_chameleon.py index 2cc9cdb29d4..5955ef48940 100644 --- a/src/transformers/models/chameleon/configuration_chameleon.py +++ b/src/transformers/models/chameleon/configuration_chameleon.py @@ -14,7 +14,7 @@ # limitations under the License. """chameleon model configuration""" -from typing import List +from typing import List, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -75,7 +75,7 @@ class ChameleonVQVAEConfig(PretrainedConfig): base_channels: int = 128, channel_multiplier: List[int] = [1, 1, 2, 2, 4], num_res_blocks: int = 2, - attn_resolutions: List[int] = None, + attn_resolutions: Optional[List[int]] = None, dropout: float = 0.0, attn_type: str = "vanilla", initializer_range=0.02, diff --git a/src/transformers/models/chameleon/image_processing_chameleon.py b/src/transformers/models/chameleon/image_processing_chameleon.py index 2d1417a8ee8..e694cee7bb8 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon.py +++ b/src/transformers/models/chameleon/image_processing_chameleon.py @@ -88,10 +88,10 @@ class ChameleonImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.LANCZOS, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 0.0078, do_normalize: bool = True, @@ -173,7 +173,7 @@ class ChameleonImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index d14d286b57d..e8f8ba1e8d5 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -96,10 +96,10 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -170,7 +170,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 77215ad636d..a506da423de 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -95,10 +95,10 @@ class CLIPImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -203,7 +203,7 @@ class CLIPImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/clip/modeling_flax_clip.py b/src/transformers/models/clip/modeling_flax_clip.py index c674d35e3da..c8eb6cf02ed 100644 --- a/src/transformers/models/clip/modeling_flax_clip.py +++ b/src/transformers/models/clip/modeling_flax_clip.py @@ -667,7 +667,7 @@ class FlaxCLIPTextPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -745,7 +745,7 @@ class FlaxCLIPVisionPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -823,7 +823,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): pixel_values, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -867,7 +867,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False, ): @@ -930,7 +930,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): ) def get_image_features( - self, pixel_values, params: dict = None, dropout_rng: jax.random.PRNGKey = None, train=False + self, pixel_values, params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False ): r""" Args: diff --git a/src/transformers/models/codegen/configuration_codegen.py b/src/transformers/models/codegen/configuration_codegen.py index 6de483cb794..7ed03ab3f65 100644 --- a/src/transformers/models/codegen/configuration_codegen.py +++ b/src/transformers/models/codegen/configuration_codegen.py @@ -151,7 +151,7 @@ class CodeGenOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 3c256e4f70b..83dc0f2c8b6 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -749,7 +749,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -863,13 +863,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, @@ -1633,7 +1633,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): return results # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation with Detr->ConditionalDetr - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index efa1d9476ec..0566eb39477 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -850,7 +850,7 @@ class ConditionalDetrImageProcessorFast(BaseImageProcessorFast): return results - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 2f7e445241c..5093c9d33b9 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -91,7 +91,7 @@ class ConvNextImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[float] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, @@ -190,7 +190,7 @@ class ConvNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[float] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/cpmant/tokenization_cpmant.py b/src/transformers/models/cpmant/tokenization_cpmant.py index 2da1d6286c5..e5cc353cc57 100644 --- a/src/transformers/models/cpmant/tokenization_cpmant.py +++ b/src/transformers/models/cpmant/tokenization_cpmant.py @@ -222,7 +222,9 @@ class CpmAntTokenizer(PreTrainedTokenizer): index += 1 return (vocab_file,) - def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: List[int] = None) -> List[int]: + def build_inputs_with_special_tokens( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A CPMAnt sequence has the following format: diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py index 32746a38dd0..3d8cf3e2795 100644 --- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py @@ -19,6 +19,7 @@ import gc import json import re from pathlib import Path +from typing import Optional import torch from huggingface_hub import hf_hub_download @@ -87,7 +88,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # Copied from transformers.models.mllama.convert_mllama_weights_to_hf.convert_old_keys_to_new_keys -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/dbrx/configuration_dbrx.py b/src/transformers/models/dbrx/configuration_dbrx.py index 72df1fe335b..36d48380b8c 100644 --- a/src/transformers/models/dbrx/configuration_dbrx.py +++ b/src/transformers/models/dbrx/configuration_dbrx.py @@ -89,7 +89,7 @@ class DbrxFFNConfig(PretrainedConfig): def __init__( self, - ffn_act_fn: dict = None, + ffn_act_fn: Optional[dict] = None, ffn_hidden_size: int = 3584, moe_num_experts: int = 4, moe_top_k: int = 1, diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index f7ad8a14997..81cc3b8f33c 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -747,7 +747,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -861,13 +861,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index b05622be065..7b198f5200a 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -84,10 +84,10 @@ class DeiTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -166,10 +166,10 @@ class DeiTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py index c63be138276..e76228fb6bf 100644 --- a/src/transformers/models/deprecated/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -553,13 +553,13 @@ class DetaImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: bool = True, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py index 74d16a048de..83a78ac65fa 100644 --- a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py @@ -91,7 +91,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor): do_center_crop: bool = True, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, @@ -179,7 +179,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/deprecated/mega/modeling_mega.py b/src/transformers/models/deprecated/mega/modeling_mega.py index 85d10156103..2c4a848df5c 100644 --- a/src/transformers/models/deprecated/mega/modeling_mega.py +++ b/src/transformers/models/deprecated/mega/modeling_mega.py @@ -1684,7 +1684,7 @@ class MegaForCausalLM(MegaPreTrainedModel): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/tapex/tokenization_tapex.py b/src/transformers/models/deprecated/tapex/tokenization_tapex.py index 3d554872c48..719d3c59f32 100644 --- a/src/transformers/models/deprecated/tapex/tokenization_tapex.py +++ b/src/transformers/models/deprecated/tapex/tokenization_tapex.py @@ -497,7 +497,7 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]] = None, query: Optional[Union[TextInput, List[TextInput]]] = None, - answer: Union[str, List[str]] = None, + answer: Optional[Union[str, List[str]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, @@ -574,7 +574,7 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]], query: Optional[Union[TextInput, List[TextInput]]] = None, - answer: Union[str, List[str]] = None, + answer: Optional[Union[str, List[str]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, @@ -662,10 +662,10 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]], query: Optional[List[TextInput]] = None, - answer: List[str] = None, + answer: Optional[List[str]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -884,7 +884,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Optional[str] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1053,7 +1053,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: List[str], add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1197,7 +1197,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: str, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py index 02d78c93407..a10b9b3b211 100644 --- a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py @@ -121,12 +121,12 @@ class TvltImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, patch_size: List[int] = [16, 16], num_frames: int = 8, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -221,10 +221,10 @@ class TvltImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -278,12 +278,12 @@ class TvltImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - patch_size: List[int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[List[int]] = None, num_frames: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index 72410878933..9f644bfc563 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -93,10 +93,10 @@ class ViTHybridImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -193,7 +193,7 @@ class ViTHybridImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py b/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py index b24c6a5174f..655bbdc0230 100644 --- a/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py +++ b/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py @@ -15,6 +15,7 @@ import argparse import gc import os +from typing import Optional import regex as re import torch @@ -93,7 +94,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): output_dict = {} if state_dict_keys is not None: old_text = "\n".join(state_dict_keys) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 75d7e74adde..0b365eafa17 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -732,7 +732,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -845,13 +845,13 @@ class DetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, @@ -1824,7 +1824,7 @@ class DetrImageProcessor(BaseImageProcessor): return results - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`DetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index dc14ec61f06..419d099e913 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -1088,7 +1088,7 @@ class DetrImageProcessorFast(BaseImageProcessorFast): return results # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`DetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/dinov2/modeling_flax_dinov2.py b/src/transformers/models/dinov2/modeling_flax_dinov2.py index 2766850e921..48afecde5e1 100644 --- a/src/transformers/models/dinov2/modeling_flax_dinov2.py +++ b/src/transformers/models/dinov2/modeling_flax_dinov2.py @@ -592,7 +592,7 @@ class FlaxDinov2PreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/distilbert/modeling_flax_distilbert.py b/src/transformers/models/distilbert/modeling_flax_distilbert.py index 1f2b6ac96ab..e9c12c4b088 100644 --- a/src/transformers/models/distilbert/modeling_flax_distilbert.py +++ b/src/transformers/models/distilbert/modeling_flax_distilbert.py @@ -459,7 +459,7 @@ class FlaxDistilBertPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, head_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index 667c7ab3f6c..c45e11430ff 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -94,7 +94,7 @@ class DonutImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_thumbnail: bool = True, do_align_long_axis: bool = False, @@ -313,7 +313,7 @@ class DonutImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 9a35ee4b4a3..a22548f5cd9 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -154,7 +154,7 @@ class DPTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, keep_aspect_ratio: bool = False, ensure_multiple_of: int = 1, @@ -299,7 +299,7 @@ class DPTImageProcessor(BaseImageProcessor): image: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -340,7 +340,7 @@ class DPTImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -391,7 +391,7 @@ class DPTImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -592,7 +592,7 @@ class DPTImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->DPT - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`DPTForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 612ede7086e..6aa42f18ce9 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -87,10 +87,10 @@ class EfficientNetImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.NEAREST, do_center_crop: bool = False, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, rescale_offset: bool = False, do_rescale: bool = True, @@ -213,10 +213,10 @@ class EfficientNetImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, rescale_offset: Optional[bool] = None, diff --git a/src/transformers/models/electra/modeling_flax_electra.py b/src/transformers/models/electra/modeling_flax_electra.py index 4bf75ff33e4..7cc20ec27fc 100644 --- a/src/transformers/models/electra/modeling_flax_electra.py +++ b/src/transformers/models/electra/modeling_flax_electra.py @@ -777,13 +777,13 @@ class FlaxElectraPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/emu3/configuration_emu3.py b/src/transformers/models/emu3/configuration_emu3.py index 5b5abedf401..60e5e55ab44 100644 --- a/src/transformers/models/emu3/configuration_emu3.py +++ b/src/transformers/models/emu3/configuration_emu3.py @@ -304,7 +304,7 @@ class Emu3Config(PretrainedConfig): self, vq_config: Union[Dict, Emu3VQVAEConfig] = None, text_config: Union[Dict, Emu3TextConfig] = None, - vocabulary_map: Dict[int, int] = None, + vocabulary_map: Optional[Dict[int, int]] = None, **kwargs, ): if vq_config is None: diff --git a/src/transformers/models/emu3/image_processing_emu3.py b/src/transformers/models/emu3/image_processing_emu3.py index a63269c99ef..3780de93c36 100644 --- a/src/transformers/models/emu3/image_processing_emu3.py +++ b/src/transformers/models/emu3/image_processing_emu3.py @@ -309,7 +309,7 @@ class Emu3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 415fd058e45..1ed7a2a5ce2 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -550,7 +550,7 @@ class EncoderDecoderModel(PreTrainedModel, GenerationMixin): decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index ccb0aa0a6d2..c37e7d3537d 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -436,7 +436,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -508,12 +508,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -638,7 +638,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py index 2c89feea43e..5bb3f150f6b 100644 --- a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py @@ -14,7 +14,7 @@ # limitations under the License. """FastSpeech2Conformer model configuration""" -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -459,8 +459,8 @@ class FastSpeech2ConformerWithHifiGanConfig(PretrainedConfig): def __init__( self, - model_config: Dict = None, - vocoder_config: Dict = None, + model_config: Optional[Dict] = None, + vocoder_config: Optional[Dict] = None, **kwargs, ): if model_config is None: diff --git a/src/transformers/models/flava/configuration_flava.py b/src/transformers/models/flava/configuration_flava.py index 7a18b33ac86..4f9a47b4d15 100644 --- a/src/transformers/models/flava/configuration_flava.py +++ b/src/transformers/models/flava/configuration_flava.py @@ -14,7 +14,7 @@ # limitations under the License. """FLAVA model configurations""" -from typing import Any, Dict +from typing import Any, Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -472,10 +472,10 @@ class FlavaConfig(PretrainedConfig): def __init__( self, - image_config: Dict[str, Any] = None, - text_config: Dict[str, Any] = None, - multimodal_config: Dict[str, Any] = None, - image_codebook_config: Dict[str, Any] = None, + image_config: Optional[Dict[str, Any]] = None, + text_config: Optional[Dict[str, Any]] = None, + multimodal_config: Optional[Dict[str, Any]] = None, + image_codebook_config: Optional[Dict[str, Any]] = None, hidden_size: int = 768, layer_norm_eps: float = 1e-12, projection_dim: int = 768, diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 2b85a64cb84..caa03dca8cf 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -228,10 +228,10 @@ class FlavaImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -392,10 +392,10 @@ class FlavaImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -457,7 +457,7 @@ class FlavaImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index 080ff772e28..2984aac67da 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -537,7 +537,7 @@ class FuyuImageProcessor(BaseImageProcessor): } return FuyuBatchFeature(data=data, tensor_type=return_tensors) - def get_num_patches(self, image_height: int, image_width: int, patch_size: Dict[str, int] = None) -> int: + def get_num_patches(self, image_height: int, image_width: int, patch_size: Optional[Dict[str, int]] = None) -> int: """ Calculate number of patches required to encode an image. diff --git a/src/transformers/models/gemma/modeling_flax_gemma.py b/src/transformers/models/gemma/modeling_flax_gemma.py index 1b8c3671f00..237e92d9492 100644 --- a/src/transformers/models/gemma/modeling_flax_gemma.py +++ b/src/transformers/models/gemma/modeling_flax_gemma.py @@ -485,8 +485,8 @@ class FlaxGemmaPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gemma3/image_processing_gemma3.py b/src/transformers/models/gemma3/image_processing_gemma3.py index a138acef382..91e2d0c66a4 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3.py +++ b/src/transformers/models/gemma3/image_processing_gemma3.py @@ -95,7 +95,7 @@ class Gemma3ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -241,7 +241,7 @@ class Gemma3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py b/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py index 3df7214410e..2bf4b3ac7d0 100644 --- a/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py +++ b/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py @@ -61,7 +61,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { CONTEXT_LENGTH = 8000 -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py index 875c0742b96..dc06f1ef391 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py @@ -172,7 +172,7 @@ class GotOcr2ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_to_patches: bool = False, min_patches: int = 1, max_patches: int = 12, @@ -419,7 +419,7 @@ class GotOcr2ImageProcessor(BaseImageProcessor): min_patches: int, max_patches: int, use_thumbnail: bool = True, - patch_size: Union[Tuple, int, dict] = None, + patch_size: Optional[Union[Tuple, int, dict]] = None, data_format: ChannelDimension = None, ): """ diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py index 8498e378030..e8b17c4ed36 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py @@ -114,7 +114,7 @@ class GotOcr2ImageProcessorFast(BaseImageProcessorFast): min_patches: int, max_patches: int, use_thumbnail: bool = True, - patch_size: Union[Tuple, int, dict] = None, + patch_size: Optional[Union[Tuple, int, dict]] = None, interpolation: Optional["F.InterpolationMode"] = None, ): """ diff --git a/src/transformers/models/gpt2/configuration_gpt2.py b/src/transformers/models/gpt2/configuration_gpt2.py index f3ebea02496..fb582998bf8 100644 --- a/src/transformers/models/gpt2/configuration_gpt2.py +++ b/src/transformers/models/gpt2/configuration_gpt2.py @@ -194,7 +194,7 @@ class GPT2OnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/gpt2/modeling_flax_gpt2.py b/src/transformers/models/gpt2/modeling_flax_gpt2.py index b6000aed5d7..1382a255612 100644 --- a/src/transformers/models/gpt2/modeling_flax_gpt2.py +++ b/src/transformers/models/gpt2/modeling_flax_gpt2.py @@ -461,8 +461,8 @@ class FlaxGPT2PreTrainedModel(FlaxPreTrainedModel): position_ids=None, encoder_hidden_states: Optional[jnp.ndarray] = None, encoder_attention_mask: Optional[jnp.ndarray] = None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py index 7f74b80001f..f7371d08b03 100644 --- a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py @@ -404,8 +404,8 @@ class FlaxGPTNeoPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gptj/configuration_gptj.py b/src/transformers/models/gptj/configuration_gptj.py index 5e76b3f4ba6..5b59d309c25 100644 --- a/src/transformers/models/gptj/configuration_gptj.py +++ b/src/transformers/models/gptj/configuration_gptj.py @@ -140,7 +140,7 @@ class GPTJOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/gptj/modeling_flax_gptj.py b/src/transformers/models/gptj/modeling_flax_gptj.py index 01ec3acd50f..ee88f69cc18 100644 --- a/src/transformers/models/gptj/modeling_flax_gptj.py +++ b/src/transformers/models/gptj/modeling_flax_gptj.py @@ -438,8 +438,8 @@ class FlaxGPTJPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py index 03a6c2e4e3c..f32d3095ffa 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py @@ -756,7 +756,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -899,13 +899,13 @@ class GroundingDinoImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 755cfaf5d99..a741539a405 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -2554,7 +2554,7 @@ class GroundingDinoForObjectDetection(GroundingDinoPreTrainedModel): output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: List[Dict[str, Union[torch.LongTensor, torch.FloatTensor]]] = None, + labels: Optional[List[Dict[str, Union[torch.LongTensor, torch.FloatTensor]]]] = None, ): r""" labels (`List[Dict]` of len `(batch_size,)`, *optional*): diff --git a/src/transformers/models/idefics/image_processing_idefics.py b/src/transformers/models/idefics/image_processing_idefics.py index 768ef893d2d..17b7fb4f39f 100644 --- a/src/transformers/models/idefics/image_processing_idefics.py +++ b/src/transformers/models/idefics/image_processing_idefics.py @@ -101,7 +101,7 @@ class IdeficsImageProcessor(BaseImageProcessor): image_size: Optional[Dict[str, int]] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - transform: Callable = None, + transform: Optional[Callable] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH, diff --git a/src/transformers/models/idefics2/image_processing_idefics2.py b/src/transformers/models/idefics2/image_processing_idefics2.py index 2a853fc02e7..239a266d9bb 100644 --- a/src/transformers/models/idefics2/image_processing_idefics2.py +++ b/src/transformers/models/idefics2/image_processing_idefics2.py @@ -190,7 +190,7 @@ class Idefics2ImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, diff --git a/src/transformers/models/idefics3/image_processing_idefics3.py b/src/transformers/models/idefics3/image_processing_idefics3.py index b0677c4708d..b2f049e998a 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3.py +++ b/src/transformers/models/idefics3/image_processing_idefics3.py @@ -295,10 +295,10 @@ class Idefics3ImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.LANCZOS, do_image_splitting: bool = True, - max_image_size: Dict[str, int] = None, + max_image_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/ijepa/convert_ijepa_to_hf.py b/src/transformers/models/ijepa/convert_ijepa_to_hf.py index 5c15a72ff88..25d97df6ce8 100644 --- a/src/transformers/models/ijepa/convert_ijepa_to_hf.py +++ b/src/transformers/models/ijepa/convert_ijepa_to_hf.py @@ -21,6 +21,7 @@ import argparse import gc import re from pathlib import Path +from typing import Optional import requests import torch @@ -63,7 +64,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ Converts old keys to new keys using the mapping and dynamically removes the 'ijepa.' prefix if necessary. diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index af13a2d3179..5b941a6c779 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -89,7 +89,7 @@ class ImageGPTImageProcessor(BaseImageProcessor): # clusters is a first argument to maintain backwards compatibility with the old ImageGPTImageProcessor clusters: Optional[Union[List[List[int]], np.ndarray]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_normalize: bool = True, do_color_quantize: bool = True, @@ -180,7 +180,7 @@ class ImageGPTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_normalize: Optional[bool] = None, do_color_quantize: Optional[bool] = None, diff --git a/src/transformers/models/informer/configuration_informer.py b/src/transformers/models/informer/configuration_informer.py index 028f5b32295..5cb34dc8970 100644 --- a/src/transformers/models/informer/configuration_informer.py +++ b/src/transformers/models/informer/configuration_informer.py @@ -141,7 +141,7 @@ class InformerConfig(PretrainedConfig): distribution_output: str = "student_t", loss: str = "nll", input_size: int = 1, - lags_sequence: List[int] = None, + lags_sequence: Optional[List[int]] = None, scaling: Optional[Union[str, bool]] = "mean", num_dynamic_real_features: int = 0, num_static_real_features: int = 0, diff --git a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py index 9c55ba60d39..32018a79542 100644 --- a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py @@ -84,7 +84,7 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/layoutlm/configuration_layoutlm.py b/src/transformers/models/layoutlm/configuration_layoutlm.py index aebd25d5369..e0f9ef60f6c 100644 --- a/src/transformers/models/layoutlm/configuration_layoutlm.py +++ b/src/transformers/models/layoutlm/configuration_layoutlm.py @@ -139,7 +139,7 @@ class LayoutLMOnnxConfig(OnnxConfig): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, ): super().__init__(config, task=task, patching_specs=patching_specs) self.max_2d_positions = config.max_2d_position_embeddings - 1 diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index 5d946982fa7..8a73e443de5 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -129,7 +129,7 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, apply_ocr: bool = True, ocr_lang: Optional[str] = None, @@ -201,7 +201,7 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, diff --git a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py index 39d34b3a997..a5ac6681c28 100644 --- a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py @@ -71,7 +71,7 @@ class LayoutLMv2Processor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index 1fa23c32ae4..2466bdc80d2 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -406,7 +406,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index 5d36e9fd270..32d38be09bb 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -157,7 +157,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index d322c78d7e6..705a5e51232 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -146,13 +146,13 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_value: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, Iterable[float]] = None, - image_std: Union[float, Iterable[float]] = None, + image_mean: Optional[Union[float, Iterable[float]]] = None, + image_std: Optional[Union[float, Iterable[float]]] = None, apply_ocr: bool = True, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = "", @@ -228,13 +228,13 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, - image_mean: Union[float, Iterable[float]] = None, - image_std: Union[float, Iterable[float]] = None, + image_mean: Optional[Union[float, Iterable[float]]] = None, + image_std: Optional[Union[float, Iterable[float]]] = None, apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = None, diff --git a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py index 4bd9955775d..209272ca354 100644 --- a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py @@ -71,7 +71,7 @@ class LayoutLMv3Processor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py index b88f7b4c1b0..7758ba0acc6 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py @@ -535,7 +535,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py index 737a50df9f2..3d0cd26d80c 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py @@ -201,7 +201,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index 892a7c2cf1d..b325221d9f4 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -70,7 +70,7 @@ class LayoutXLMProcessor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index 8dc459ba940..8aa85b4f318 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -441,7 +441,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index 4c16642c57c..66d972f4482 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -269,7 +269,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index d980bea5552..8b3e4e4cf1f 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -90,10 +90,10 @@ class LevitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/llama/modeling_flax_llama.py b/src/transformers/models/llama/modeling_flax_llama.py index 1fed0a36c14..14bc16ede67 100644 --- a/src/transformers/models/llama/modeling_flax_llama.py +++ b/src/transformers/models/llama/modeling_flax_llama.py @@ -467,8 +467,8 @@ class FlaxLlamaPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/llama4/convert_llama4_weights_to_hf.py b/src/transformers/models/llama4/convert_llama4_weights_to_hf.py index 923d9ffc63b..bce62169a4e 100644 --- a/src/transformers/models/llama4/convert_llama4_weights_to_hf.py +++ b/src/transformers/models/llama4/convert_llama4_weights_to_hf.py @@ -90,7 +90,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 0959199c2ef..bc56310f189 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -1287,7 +1287,7 @@ class Llama4VisionEncoderLayer(nn.Module): hidden_state: torch.Tensor, freqs_ci: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - output_attentions: bool = None, + output_attentions: Optional[bool] = None, ): # Self Attention residual = hidden_state diff --git a/src/transformers/models/llava/image_processing_llava.py b/src/transformers/models/llava/image_processing_llava.py index 37ef079c918..2940ed5c801 100644 --- a/src/transformers/models/llava/image_processing_llava.py +++ b/src/transformers/models/llava/image_processing_llava.py @@ -99,10 +99,10 @@ class LlavaImageProcessor(BaseImageProcessor): self, do_pad: bool = False, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index 63246e8a53a..e1afee31928 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -168,11 +168,11 @@ class LlavaNextImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -330,7 +330,7 @@ class LlavaNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, @@ -559,8 +559,8 @@ class LlavaNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index bedba000c7f..06ee0fbdaec 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -93,11 +93,11 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -180,7 +180,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, @@ -280,7 +280,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): self, images: VideoInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py index 5a9bb5e3eae..8cfdfee1f4a 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py @@ -162,8 +162,8 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -454,7 +454,7 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -529,8 +529,8 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py index 61ef776db89..b9ac7a6e4bc 100644 --- a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py @@ -84,7 +84,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -112,7 +112,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -203,7 +203,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): self, videos: VideoInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/longformer/configuration_longformer.py b/src/transformers/models/longformer/configuration_longformer.py index 7a4d6984717..16bcba9fbbf 100644 --- a/src/transformers/models/longformer/configuration_longformer.py +++ b/src/transformers/models/longformer/configuration_longformer.py @@ -139,7 +139,9 @@ class LongformerConfig(PretrainedConfig): class LongformerOnnxConfig(OnnxConfig): - def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: "List[PatchingSpec]" = None): + def __init__( + self, config: "PretrainedConfig", task: str = "default", patching_specs: "Optional[List[PatchingSpec]]" = None + ): super().__init__(config, task, patching_specs) config.onnx_export = True diff --git a/src/transformers/models/longt5/modeling_flax_longt5.py b/src/transformers/models/longt5/modeling_flax_longt5.py index 13017044143..b9a341349f7 100644 --- a/src/transformers/models/longt5/modeling_flax_longt5.py +++ b/src/transformers/models/longt5/modeling_flax_longt5.py @@ -1731,7 +1731,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1816,7 +1816,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1872,12 +1872,12 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -2260,12 +2260,12 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/marian/modeling_flax_marian.py b/src/transformers/models/marian/modeling_flax_marian.py index d4844b6fc32..2436158806a 100644 --- a/src/transformers/models/marian/modeling_flax_marian.py +++ b/src/transformers/models/marian/modeling_flax_marian.py @@ -970,7 +970,7 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1032,12 +1032,12 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1150,7 +1150,7 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1299,12 +1299,12 @@ class FlaxMarianMTModel(FlaxMarianPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index fb3a8f7a651..9884b6d7e9e 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -1155,7 +1155,7 @@ class TFMarianMainLayer(keras.layers.Layer): decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index 26ba704150d..6d1edf2bbb3 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -495,7 +495,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - xpaths: Union[List[List[int]], List[List[List[int]]]] = None, + xpaths: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index 55d75e35416..e9e9a11953f 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -270,7 +270,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - xpaths: Union[List[List[int]], List[List[List[int]]]] = None, + xpaths: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 5c61431bf02..2a3faf18317 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -207,7 +207,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -404,14 +404,14 @@ class Mask2FormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, num_labels: Optional[int] = None, @@ -576,7 +576,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -600,7 +600,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -642,7 +642,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 107267bb516..bf0bedd2917 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -1804,7 +1804,7 @@ class Mask2FormerMaskedAttentionDecoder(nn.Module): pixel_embeddings: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, query_position_embeddings: Optional[torch.Tensor] = None, - feature_size_list: List = None, + feature_size_list: Optional[List] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index f433678019d..32fc423f08c 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -213,7 +213,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -410,14 +410,14 @@ class MaskFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, num_labels: Optional[int] = None, @@ -579,7 +579,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -603,7 +603,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -645,7 +645,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: @@ -973,7 +973,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): return encoded_inputs def post_process_segmentation( - self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None + self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Optional[Tuple[int, int]] = None ) -> "torch.Tensor": """ Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only diff --git a/src/transformers/models/mbart/modeling_flax_mbart.py b/src/transformers/models/mbart/modeling_flax_mbart.py index 2f1b650a5d6..1e019f5199e 100644 --- a/src/transformers/models/mbart/modeling_flax_mbart.py +++ b/src/transformers/models/mbart/modeling_flax_mbart.py @@ -1045,7 +1045,7 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1106,12 +1106,12 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1223,7 +1223,7 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1371,12 +1371,12 @@ class FlaxMBartForConditionalGeneration(FlaxMBartPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index dd9bf976a21..16c53caa3f2 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -1430,7 +1430,7 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/mistral/modeling_flax_mistral.py b/src/transformers/models/mistral/modeling_flax_mistral.py index f02446ae3eb..c90bf25a957 100644 --- a/src/transformers/models/mistral/modeling_flax_mistral.py +++ b/src/transformers/models/mistral/modeling_flax_mistral.py @@ -461,8 +461,8 @@ class FlaxMistralPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py index f5f338fd8dd..8cb9e78daa5 100644 --- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py +++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py @@ -90,7 +90,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { CONTEXT_LENGTH = 131072 -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index c9f96a955e0..2342979f0de 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -92,7 +92,7 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -171,10 +171,10 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index ca6aa04c148..e6b909a8e49 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -98,7 +98,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -177,10 +177,10 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -309,7 +309,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->MobileNetV2 - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 23ceae679f8..c23de20ee52 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -90,12 +90,12 @@ class MobileViTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: bool = True, **kwargs, ) -> None: @@ -223,12 +223,12 @@ class MobileViTImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: Optional[bool] = None, data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -265,9 +265,9 @@ class MobileViTImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" @@ -305,12 +305,12 @@ class MobileViTImageProcessor(BaseImageProcessor): images: ImageInput, segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, @@ -440,7 +440,7 @@ class MobileViTImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->MobileViT - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`MobileViTForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index 6fa1c0c5e4e..a7387004a6e 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -1067,7 +1067,7 @@ class MoshiDepthDecoder(MoshiPreTrainedModel, GenerationMixin): input_ids: Optional[torch.LongTensor] = None, last_hidden_state: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.BoolTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1931,7 +1931,7 @@ class MoshiForConditionalGeneration(MoshiPreTrainedModel, GenerationMixin): user_audio_codes: Optional[torch.Tensor] = None, moshi_input_values: Optional[torch.FloatTensor] = None, moshi_audio_codes: Optional[torch.Tensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, text_labels: Optional[torch.LongTensor] = None, audio_labels: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index bea8d9c637b..e18c09a11bf 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -2018,7 +2018,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin): decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, @@ -2439,7 +2439,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin): return torch.ones((batch_size, 1), dtype=torch.long, device=self.device) * bos_token_id def _get_decoder_start_token_id( - self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None + self, decoder_start_token_id: Optional[Union[int, List[int]]] = None, bos_token_id: Optional[int] = None ) -> int: decoder_start_token_id = ( decoder_start_token_id diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index a3cc95690d1..2489ec9a388 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -1917,7 +1917,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin): input_features: Optional[torch.FloatTensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, @@ -2310,7 +2310,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin): # Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration._get_decoder_start_token_id def _get_decoder_start_token_id( - self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None + self, decoder_start_token_id: Optional[Union[int, List[int]]] = None, bos_token_id: Optional[int] = None ) -> int: decoder_start_token_id = ( decoder_start_token_id diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 25b5c5e7bc8..9d38a0afafd 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -96,7 +96,7 @@ class NougatImageProcessor(BaseImageProcessor): self, do_crop_margin: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_thumbnail: bool = True, do_align_long_axis: bool = False, @@ -373,13 +373,13 @@ class NougatImageProcessor(BaseImageProcessor): images: ImageInput, do_crop_margin: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, do_rescale: Optional[bool] = None, - rescale_factor: Union[int, float] = None, + rescale_factor: Optional[Union[int, float]] = None, do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, diff --git a/src/transformers/models/nougat/processing_nougat.py b/src/transformers/models/nougat/processing_nougat.py index ca395e261af..6f48f23d581 100644 --- a/src/transformers/models/nougat/processing_nougat.py +++ b/src/transformers/models/nougat/processing_nougat.py @@ -52,13 +52,13 @@ class NougatProcessor(ProcessorMixin): text=None, do_crop_margin: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: "PILImageResampling" = None, # noqa: F821 do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, do_rescale: Optional[bool] = None, - rescale_factor: Union[int, float] = None, + rescale_factor: Optional[Union[int, float]] = None, do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, diff --git a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py index 3f5c51779b9..618c67c783d 100644 --- a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py @@ -227,7 +227,7 @@ class OmDetTurboProcessor(ProcessorMixin): def __call__( self, images: ImageInput = None, - text: Union[List[str], List[List[str]]] = None, + text: Optional[Union[List[str], List[List[str]]]] = None, audio=None, videos=None, **kwargs: Unpack[OmDetTurboProcessorKwargs], diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index 956bd3e7e2f..068d6afd21b 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -210,7 +210,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -430,13 +430,13 @@ class OneFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, repo_path: Optional[str] = "shi-labs/oneformer_demo", @@ -583,7 +583,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -604,7 +604,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -644,7 +644,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" diff --git a/src/transformers/models/opt/modeling_flax_opt.py b/src/transformers/models/opt/modeling_flax_opt.py index fc023bb4ae8..97637a83d1a 100644 --- a/src/transformers/models/opt/modeling_flax_opt.py +++ b/src/transformers/models/opt/modeling_flax_opt.py @@ -585,8 +585,8 @@ class FlaxOPTPreTrainedModel(FlaxPreTrainedModel): input_ids: jnp.ndarray, attention_mask: Optional[jnp.ndarray] = None, position_ids: Optional[jnp.ndarray] = None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index bc211d1fb4b..b7c7785bc68 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -248,7 +248,7 @@ class Owlv2ImageProcessor(BaseImageProcessor): rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, @@ -371,7 +371,7 @@ class Owlv2ImageProcessor(BaseImageProcessor): images: ImageInput, do_pad: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py index 83f374651a7..856553336f3 100644 --- a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py @@ -185,10 +185,10 @@ class PatchTSMixerConfig(PretrainedConfig): distribution_output: str = "student_t", # Prediction head configuration prediction_length: int = 16, - prediction_channel_indices: list = None, + prediction_channel_indices: Optional[list] = None, # Classification/Regression configuration num_targets: int = 3, - output_range: list = None, + output_range: Optional[list] = None, head_aggregation: str = "max_pool", **kwargs, ): diff --git a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py index 2238426cd08..23363380897 100644 --- a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py @@ -816,7 +816,7 @@ class PatchTSMixerPretrainHead(nn.Module): def random_masking( inputs: torch.Tensor, mask_ratio: float, - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, channel_consistent_masking: bool = False, mask_value: int = 0, ): @@ -875,7 +875,7 @@ def random_masking( def forecast_masking( inputs: torch.Tensor, num_forecast_mask_patches: Union[list, int], - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, mask_value: int = 0, ): """Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches. diff --git a/src/transformers/models/patchtst/modeling_patchtst.py b/src/transformers/models/patchtst/modeling_patchtst.py index 95897db85fa..7ee66bec70c 100755 --- a/src/transformers/models/patchtst/modeling_patchtst.py +++ b/src/transformers/models/patchtst/modeling_patchtst.py @@ -218,7 +218,7 @@ class PatchTSTBatchNorm(nn.Module): def random_masking( inputs: torch.Tensor, mask_ratio: float, - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, channel_consistent_masking: bool = False, mask_value: int = 0, ): @@ -276,7 +276,7 @@ def random_masking( def forecast_masking( inputs: torch.Tensor, num_forecast_mask_patches: Union[list, int], - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, mask_value: int = 0, ): """Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches. diff --git a/src/transformers/models/pegasus/modeling_flax_pegasus.py b/src/transformers/models/pegasus/modeling_flax_pegasus.py index b7c7a8fd552..89b8450312f 100644 --- a/src/transformers/models/pegasus/modeling_flax_pegasus.py +++ b/src/transformers/models/pegasus/modeling_flax_pegasus.py @@ -988,7 +988,7 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1049,12 +1049,12 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1167,7 +1167,7 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1317,12 +1317,12 @@ class FlaxPegasusForConditionalGeneration(FlaxPegasusPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, deterministic: bool = True, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index a51835dcfa4..15176c92b01 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -933,7 +933,7 @@ class TFPegasusDecoder(keras.layers.Layer): encoder_attention_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1170,7 +1170,7 @@ class TFPegasusMainLayer(keras.layers.Layer): decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 82d57134739..2edd4bef93e 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -88,9 +88,9 @@ class PerceiverImageProcessor(BaseImageProcessor): def __init__( self, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index e9db5175b2e..f0525212364 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -213,7 +213,7 @@ class Pix2StructImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_normalize: bool = True, - patch_size: Dict[str, int] = None, + patch_size: Optional[Dict[str, int]] = None, max_patches: int = 2048, is_vqa: bool = False, **kwargs, diff --git a/src/transformers/models/pixtral/image_processing_pixtral.py b/src/transformers/models/pixtral/image_processing_pixtral.py index 8579cf08afa..7cd9149e385 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral.py +++ b/src/transformers/models/pixtral/image_processing_pixtral.py @@ -175,8 +175,8 @@ class PixtralImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - patch_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -320,8 +320,8 @@ class PixtralImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - patch_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index cd4c4bb7708..25c00cc5fef 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -103,11 +103,11 @@ class PoolFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: int = 0.9, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -214,11 +214,11 @@ class PoolFormerImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/pop2piano/processing_pop2piano.py b/src/transformers/models/pop2piano/processing_pop2piano.py index 3b839f8b1fd..437d4efaef2 100644 --- a/src/transformers/models/pop2piano/processing_pop2piano.py +++ b/src/transformers/models/pop2piano/processing_pop2piano.py @@ -52,7 +52,7 @@ class Pop2PianoProcessor(ProcessorMixin): def __call__( self, audio: Union[np.ndarray, List[float], List[np.ndarray]] = None, - sampling_rate: Union[int, List[int]] = None, + sampling_rate: Optional[Union[int, List[int]]] = None, steps_per_beat: int = 2, resample: Optional[bool] = True, notes: Union[List, TensorType] = None, diff --git a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py index 237be38fff3..6ae239ab137 100644 --- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py +++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py @@ -18,6 +18,7 @@ https://github.com/DepthAnything/PromptDA""" import argparse import re from pathlib import Path +from typing import Optional import requests import torch @@ -130,7 +131,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { } -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ Convert old state dict keys to new keys using regex patterns. """ diff --git a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py index 2940932fe53..00fcd2b17a1 100644 --- a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +++ b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py @@ -142,7 +142,7 @@ class PromptDepthAnythingImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, keep_aspect_ratio: bool = False, ensure_multiple_of: int = 1, diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index 75fcba6ea14..6915fff6f9d 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -150,7 +150,7 @@ class PvtImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py index c90c90c72b7..830f15df184 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py @@ -124,7 +124,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -170,7 +170,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): self, images: Union[ImageInput, VideoInput], do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -303,7 +303,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): images: ImageInput, videos: VideoInput = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, resample: PILImageResampling = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index d44ea279cb7..350d12ca871 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -256,7 +256,7 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast): images: ImageInput, videos: VideoInput = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 822347950a1..1adce9f7b06 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -1375,7 +1375,7 @@ class RagTokenForGeneration(RagPreTrainedModel, GenerationMixin): doc_scores: Optional[torch.FloatTensor] = None, n_docs: Optional[int] = None, generation_config: Optional[GenerationConfig] = None, - prefix_allowed_tokens_fn: Callable[[int, torch.Tensor], List[int]] = None, + prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, logits_processor: Optional[LogitsProcessorList] = LogitsProcessorList(), stopping_criteria: Optional[StoppingCriteriaList] = StoppingCriteriaList(), **kwargs, diff --git a/src/transformers/models/regnet/modeling_flax_regnet.py b/src/transformers/models/regnet/modeling_flax_regnet.py index 4a4e0a424a4..8d2921ea149 100644 --- a/src/transformers/models/regnet/modeling_flax_regnet.py +++ b/src/transformers/models/regnet/modeling_flax_regnet.py @@ -598,7 +598,7 @@ class FlaxRegNetPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, train: bool = False, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index ef3250a7125..ac4ee0a970f 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -239,7 +239,7 @@ class RemBertSelfAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, output_attentions: bool = False, ) -> Tuple: mixed_query_layer = self.query(hidden_states) diff --git a/src/transformers/models/resnet/modeling_flax_resnet.py b/src/transformers/models/resnet/modeling_flax_resnet.py index aa6c84c0fdb..e6aba34cbee 100644 --- a/src/transformers/models/resnet/modeling_flax_resnet.py +++ b/src/transformers/models/resnet/modeling_flax_resnet.py @@ -489,7 +489,7 @@ class FlaxResNetPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, train: bool = False, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/roberta/modeling_flax_roberta.py b/src/transformers/models/roberta/modeling_flax_roberta.py index 2beb0a06b8d..3eb6d539291 100644 --- a/src/transformers/models/roberta/modeling_flax_roberta.py +++ b/src/transformers/models/roberta/modeling_flax_roberta.py @@ -824,13 +824,13 @@ class FlaxRobertaPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index f2dfa19a6a5..8b77f2bd63b 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1040,7 +1040,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py index 1e691c047bd..ca90ce96fb4 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py @@ -828,13 +828,13 @@ class FlaxRobertaPreLayerNormPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py index 6b0c40b222c..35c6550ff9f 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py @@ -894,7 +894,7 @@ class RobertaPreLayerNormForCausalLM(RobertaPreLayerNormPreTrainedModel, Generat encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/roformer/modeling_flax_roformer.py b/src/transformers/models/roformer/modeling_flax_roformer.py index f47146f16bb..c320407813e 100644 --- a/src/transformers/models/roformer/modeling_flax_roformer.py +++ b/src/transformers/models/roformer/modeling_flax_roformer.py @@ -648,7 +648,7 @@ class FlaxRoFormerPreTrainedModel(FlaxPreTrainedModel): attention_mask=None, token_type_ids=None, head_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr.py b/src/transformers/models/rt_detr/image_processing_rt_detr.py index e458de37949..cdefc9ff947 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py @@ -440,13 +440,13 @@ class RTDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = False, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: bool = True, do_pad: bool = False, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py index 51372b74e42..1a86ac02af5 100644 --- a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py +++ b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py @@ -18,6 +18,7 @@ import argparse import json import re from pathlib import Path +from typing import Optional import requests import torch @@ -159,7 +160,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { } -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): # Use the mapping to rename keys for original_key, converted_key in ORIGINAL_TO_CONVERTED_KEY_MAPPING.items(): for key in list(state_dict_keys.keys()): diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index 9288cc1485c..3142d9d1981 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -118,8 +118,8 @@ class SamImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - mask_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + mask_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -294,7 +294,7 @@ class SamImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -349,7 +349,7 @@ class SamImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - mask_size: Dict[str, int] = None, + mask_size: Optional[Dict[str, int]] = None, do_pad: Optional[bool] = None, mask_pad_size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index 79cbe47482d..84fec78a2ad 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -99,7 +99,7 @@ class SegformerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -222,7 +222,7 @@ class SegformerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -264,7 +264,7 @@ class SegformerImageProcessor(BaseImageProcessor): segmentation_map: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" @@ -437,7 +437,7 @@ class SegformerImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->Segformer - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`SegformerForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/seggpt/image_processing_seggpt.py b/src/transformers/models/seggpt/image_processing_seggpt.py index 26c7c1f47ac..b469586de83 100644 --- a/src/transformers/models/seggpt/image_processing_seggpt.py +++ b/src/transformers/models/seggpt/image_processing_seggpt.py @@ -247,7 +247,7 @@ class SegGptImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -394,7 +394,7 @@ class SegGptImageProcessor(BaseImageProcessor): prompt_images: Optional[ImageInput] = None, prompt_masks: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index 7ec6c36d39c..ae9dabb3ed9 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -82,7 +82,7 @@ class SiglipImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -112,7 +112,7 @@ class SiglipImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm.py b/src/transformers/models/smolvlm/image_processing_smolvlm.py index abf9353fd4e..92b432de0ec 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm.py @@ -292,10 +292,10 @@ class SmolVLMImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.LANCZOS, do_image_splitting: bool = True, - max_image_size: Dict[str, int] = None, + max_image_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index 772b1d23bd5..e266460346c 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -474,7 +474,7 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): return_dict: Optional[bool] = None, train: bool = False, freeze_feature_encoder: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -542,12 +542,12 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -671,7 +671,7 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): return_dict: Optional[bool] = None, train: bool = False, freeze_feature_encoder: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/superglue/configuration_superglue.py b/src/transformers/models/superglue/configuration_superglue.py index fe301442d63..caebe86d9c7 100644 --- a/src/transformers/models/superglue/configuration_superglue.py +++ b/src/transformers/models/superglue/configuration_superglue.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING, List, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -73,8 +73,8 @@ class SuperGlueConfig(PretrainedConfig): self, keypoint_detector_config: "SuperPointConfig" = None, hidden_size: int = 256, - keypoint_encoder_sizes: List[int] = None, - gnn_layers_types: List[str] = None, + keypoint_encoder_sizes: Optional[List[int]] = None, + gnn_layers_types: Optional[List[str]] = None, num_attention_heads: int = 4, sinkhorn_iterations: int = 100, matching_threshold: float = 0.0, diff --git a/src/transformers/models/superglue/image_processing_superglue.py b/src/transformers/models/superglue/image_processing_superglue.py index 4a858db8f4e..c2e1f936269 100644 --- a/src/transformers/models/superglue/image_processing_superglue.py +++ b/src/transformers/models/superglue/image_processing_superglue.py @@ -161,7 +161,7 @@ class SuperGlueImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, @@ -223,7 +223,7 @@ class SuperGlueImageProcessor(BaseImageProcessor): self, images, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 77802d2e5c7..e0835934df3 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -122,7 +122,7 @@ class SuperPointImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_grayscale: bool = False, @@ -181,7 +181,7 @@ class SuperPointImageProcessor(BaseImageProcessor): self, images, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index be76fe1b772..1fa8da5c2d0 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -993,7 +993,7 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1078,7 +1078,7 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1134,12 +1134,12 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1462,7 +1462,7 @@ class FlaxT5EncoderModel(FlaxT5PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1612,12 +1612,12 @@ class FlaxT5ForConditionalGeneration(FlaxT5PreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/textnet/image_processing_textnet.py b/src/transformers/models/textnet/image_processing_textnet.py index 74806a05566..f75d1db097a 100644 --- a/src/transformers/models/textnet/image_processing_textnet.py +++ b/src/transformers/models/textnet/image_processing_textnet.py @@ -94,11 +94,11 @@ class TextNetImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = False, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -204,7 +204,7 @@ class TextNetImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index be19d893a66..81129a54f1d 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -136,14 +136,14 @@ class TvpImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, - pad_size: Dict[str, int] = None, + pad_size: Optional[Dict[str, int]] = None, constant_values: Union[float, Iterable[float]] = 0, pad_mode: PaddingMode = PaddingMode.CONSTANT, do_normalize: bool = True, @@ -219,7 +219,7 @@ class TvpImageProcessor(BaseImageProcessor): def pad_image( self, image: np.ndarray, - pad_size: Dict[str, int] = None, + pad_size: Optional[Dict[str, int]] = None, constant_values: Union[float, Iterable[float]] = 0, pad_mode: PaddingMode = PaddingMode.CONSTANT, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -267,15 +267,15 @@ class TvpImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_pad: bool = True, - pad_size: Dict[str, int] = None, - constant_values: Union[float, Iterable[float]] = None, + pad_size: Optional[Dict[str, int]] = None, + constant_values: Optional[Union[float, Iterable[float]]] = None, pad_mode: PaddingMode = None, do_normalize: Optional[bool] = None, do_flip_channel_order: Optional[bool] = None, @@ -341,15 +341,15 @@ class TvpImageProcessor(BaseImageProcessor): self, videos: Union[ImageInput, List[ImageInput], List[List[ImageInput]]], do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_pad: Optional[bool] = None, - pad_size: Dict[str, int] = None, - constant_values: Union[float, Iterable[float]] = None, + pad_size: Optional[Dict[str, int]] = None, + constant_values: Optional[Union[float, Iterable[float]]] = None, pad_mode: PaddingMode = None, do_normalize: Optional[bool] = None, do_flip_channel_order: Optional[bool] = None, diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index 91a0fc10b1a..d7e20581520 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -919,7 +919,7 @@ class TvpForVideoGrounding(TvpPreTrainedModel): input_ids: Optional[torch.LongTensor] = None, pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.LongTensor] = None, - labels: Tuple[torch.Tensor] = None, + labels: Optional[Tuple[torch.Tensor]] = None, head_mask: Optional[torch.FloatTensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index b2c43815f59..5fb3c0ce8d5 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -1716,9 +1716,9 @@ class UdopModel(UdopPreTrainedModel): self, input_ids: Optional[Tensor] = None, attention_mask: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, decoder_input_ids: Optional[Tensor] = None, decoder_attention_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, @@ -1892,9 +1892,9 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin): self, input_ids: Optional[Tensor] = None, attention_mask: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, decoder_input_ids: Optional[Tensor] = None, decoder_attention_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, @@ -2104,10 +2104,10 @@ class UdopEncoderModel(UdopPreTrainedModel): def forward( self, input_ids: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, attention_mask: Optional[Tensor] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, head_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/udop/tokenization_udop.py b/src/transformers/models/udop/tokenization_udop.py index 08eccaec7ba..86ae0744d54 100644 --- a/src/transformers/models/udop/tokenization_udop.py +++ b/src/transformers/models/udop/tokenization_udop.py @@ -511,7 +511,7 @@ class UdopTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair_target: Optional[ @@ -545,7 +545,7 @@ class UdopTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/udop/tokenization_udop_fast.py b/src/transformers/models/udop/tokenization_udop_fast.py index 9992da7bddc..337617f7216 100644 --- a/src/transformers/models/udop/tokenization_udop_fast.py +++ b/src/transformers/models/udop/tokenization_udop_fast.py @@ -243,7 +243,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair_target: Optional[ @@ -278,7 +278,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/video_llava/image_processing_video_llava.py b/src/transformers/models/video_llava/image_processing_video_llava.py index 8b7f4897918..65972416833 100644 --- a/src/transformers/models/video_llava/image_processing_video_llava.py +++ b/src/transformers/models/video_llava/image_processing_video_llava.py @@ -90,10 +90,10 @@ class VideoLlavaImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -172,10 +172,10 @@ class VideoLlavaImageProcessor(BaseImageProcessor): @filter_out_non_signature_kwargs() def preprocess( self, - images: List[ImageInput] = None, - videos: List[VideoInput] = None, + images: Optional[List[ImageInput]] = None, + videos: Optional[List[VideoInput]] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index a7043d5ada5..fa0459fabbd 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -105,10 +105,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -181,10 +181,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -239,10 +239,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index d4ac8cca32c..4bd7ac55ee7 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -165,7 +165,7 @@ class ViltImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index 659afb976c3..4d96a68bc1c 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -400,7 +400,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -473,12 +473,12 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): encoder_outputs, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -607,7 +607,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index 57afc18988d..020efb3c5c1 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -273,7 +273,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): attention_mask=None, position_ids=None, token_type_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -322,7 +322,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): attention_mask=None, position_ids=None, token_type_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False, ): @@ -379,7 +379,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): ) def get_image_features( - self, pixel_values, params: dict = None, dropout_rng: jax.random.PRNGKey = None, train=False + self, pixel_values, params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False ): r""" Args: diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index ade7495b1d4..654e56ab910 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -155,7 +155,7 @@ class ViTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/vit/modeling_flax_vit.py b/src/transformers/models/vit/modeling_flax_vit.py index 5cf3477b5dd..8e86248cde1 100644 --- a/src/transformers/models/vit/modeling_flax_vit.py +++ b/src/transformers/models/vit/modeling_flax_vit.py @@ -474,7 +474,7 @@ class FlaxViTPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/vitpose/convert_vitpose_to_hf.py b/src/transformers/models/vitpose/convert_vitpose_to_hf.py index e4666751a10..e9bbad20354 100644 --- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py +++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py @@ -22,6 +22,7 @@ Notebook to get the original logits: https://colab.research.google.com/drive/1QD import argparse import os import re +from typing import Optional import requests import torch @@ -160,7 +161,7 @@ def get_config(model_name): return config -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/vitpose/image_processing_vitpose.py b/src/transformers/models/vitpose/image_processing_vitpose.py index 8320ad2d6d1..fc1a8719391 100644 --- a/src/transformers/models/vitpose/image_processing_vitpose.py +++ b/src/transformers/models/vitpose/image_processing_vitpose.py @@ -353,7 +353,7 @@ class VitPoseImageProcessor(BaseImageProcessor): def __init__( self, do_affine_transform: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -425,7 +425,7 @@ class VitPoseImageProcessor(BaseImageProcessor): images: ImageInput, boxes: Union[List[List[float]], np.ndarray], do_affine_transform: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index abf0a5808e1..7e984f02519 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -109,10 +109,10 @@ class VivitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 127.5, offset: bool = True, @@ -228,10 +228,10 @@ class VivitImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, offset: Optional[bool] = None, @@ -291,10 +291,10 @@ class VivitImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, offset: Optional[bool] = None, diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 54707620501..ee188888d7c 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -891,7 +891,7 @@ class FlaxWav2Vec2PreTrainedModel(FlaxPreTrainedModel): input_values, attention_mask=None, mask_time_indices=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -1327,7 +1327,7 @@ class FlaxWav2Vec2ForPreTraining(FlaxWav2Vec2PreTrainedModel): attention_mask=None, mask_time_indices=None, gumbel_temperature: int = 1, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, gumbel_rng: jax.random.PRNGKey = None, train: bool = False, diff --git a/src/transformers/models/whisper/modeling_flax_whisper.py b/src/transformers/models/whisper/modeling_flax_whisper.py index e3c7e9d1c65..decc393dfca 100644 --- a/src/transformers/models/whisper/modeling_flax_whisper.py +++ b/src/transformers/models/whisper/modeling_flax_whisper.py @@ -867,7 +867,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): def __init__( self, config: WhisperConfig, - input_shape: Tuple[int] = None, + input_shape: Optional[Tuple[int]] = None, seed: int = 0, dtype: jnp.dtype = jnp.float32, _do_init: bool = True, @@ -970,7 +970,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, **kwargs, ): @@ -1025,12 +1025,12 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1144,7 +1144,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1278,12 +1278,12 @@ class FlaxWhisperForConditionalGeneration(FlaxWhisperPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1631,7 +1631,7 @@ class FlaxWhisperForAudioClassification(FlaxWhisperPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, **kwargs, ): diff --git a/src/transformers/models/xglm/modeling_flax_xglm.py b/src/transformers/models/xglm/modeling_flax_xglm.py index 3b7a933e4db..96f797ea58c 100644 --- a/src/transformers/models/xglm/modeling_flax_xglm.py +++ b/src/transformers/models/xglm/modeling_flax_xglm.py @@ -619,8 +619,8 @@ class FlaxXGLMPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py index 63432be06dd..b7fdeda1b23 100644 --- a/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py @@ -831,13 +831,13 @@ class FlaxXLMRobertaPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py index 1fe5823c206..dcc9bf2344c 100644 --- a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py @@ -1043,7 +1043,7 @@ class XLMRobertaForCausalLM(XLMRobertaPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xmod/modeling_xmod.py b/src/transformers/models/xmod/modeling_xmod.py index 21aad7188e0..9465bf7a65a 100644 --- a/src/transformers/models/xmod/modeling_xmod.py +++ b/src/transformers/models/xmod/modeling_xmod.py @@ -996,7 +996,7 @@ class XmodForCausalLM(XmodPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 2c1f0d1d2b0..681ed17f2fb 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -668,7 +668,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -777,13 +777,13 @@ class YolosImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth.py b/src/transformers/models/zoedepth/image_processing_zoedepth.py index f839548320f..47920c29c68 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth.py @@ -154,7 +154,7 @@ class ZoeDepthImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, keep_aspect_ratio: bool = True, ensure_multiple_of: int = 32, diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 460ee932997..bba2592f3ce 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -108,7 +108,9 @@ class OnnxConfig(ABC): "speech2seq-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}), } - def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: List[PatchingSpec] = None): + def __init__( + self, config: "PretrainedConfig", task: str = "default", patching_specs: Optional[List[PatchingSpec]] = None + ): self._config = config if task not in self._tasks_to_common_outputs: diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 899a7cc5390..96b0565d784 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -202,7 +202,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): self, image: Union["Image.Image", str], question: Optional[str] = None, - word_boxes: Tuple[str, List[float]] = None, + word_boxes: Optional[Tuple[str, List[float]]] = None, **kwargs, ): """ @@ -283,7 +283,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): padding="do_not_pad", doc_stride=None, max_seq_len=None, - word_boxes: Tuple[str, List[float]] = None, + word_boxes: Optional[Tuple[str, List[float]]] = None, lang=None, tesseract_config="", timeout=None, diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index 616eb8def78..940dea10d76 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -13,7 +13,7 @@ # limitations under the License. import warnings from io import BytesIO -from typing import List, Union +from typing import List, Optional, Union import requests @@ -77,7 +77,7 @@ class VideoClassificationPipeline(Pipeline): postprocess_params["function_to_apply"] = "softmax" return preprocess_params, {}, postprocess_params - def __call__(self, inputs: Union[str, List[str]] = None, **kwargs): + def __call__(self, inputs: Optional[Union[str, List[str]]] = None, **kwargs): """ Assign labels to the video(s) passed as inputs. diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py index 6d600c9eaf5..83dbd8f2151 100644 --- a/src/transformers/pipelines/visual_question_answering.py +++ b/src/transformers/pipelines/visual_question_answering.py @@ -1,4 +1,4 @@ -from typing import List, Union +from typing import List, Optional, Union from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging from .base import Pipeline, build_pipeline_init_args @@ -79,7 +79,7 @@ class VisualQuestionAnsweringPipeline(Pipeline): def __call__( self, image: Union["Image.Image", str, List["Image.Image"], List[str], "KeyDataset"], - question: Union[str, List[str]] = None, + question: Optional[Union[str, List[str]]] = None, **kwargs, ): r""" diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py index ce8da7340bc..5385d84db76 100644 --- a/src/transformers/pipelines/zero_shot_object_detection.py +++ b/src/transformers/pipelines/zero_shot_object_detection.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends from .base import ChunkPipeline, build_pipeline_init_args @@ -65,7 +65,7 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline): def __call__( self, image: Union[str, "Image.Image", List[Dict[str, Any]]], - candidate_labels: Union[str, List[str]] = None, + candidate_labels: Optional[Union[str, List[str]]] = None, **kwargs, ): """ diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 4d3dd6d6bb1..49feecf694d 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -792,7 +792,7 @@ def number_of_arguments(func): def find_executable_batch_size( - function: callable = None, starting_batch_size: int = 128, auto_find_batch_size: bool = False + function: Optional[callable] = None, starting_batch_size: int = 128, auto_find_batch_size: bool = False ): """ Args: diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index f420a3e12d7..9654d5d1fff 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -1877,7 +1877,7 @@ class _LazyModule(ModuleType): module_file: str, import_structure: IMPORT_STRUCTURE_T, module_spec: Optional[importlib.machinery.ModuleSpec] = None, - extra_objects: Dict[str, object] = None, + extra_objects: Optional[Dict[str, object]] = None, ): super().__init__(name) @@ -2412,7 +2412,7 @@ def spread_import_structure(nested_import_structure): @lru_cache() -def define_import_structure(module_path: str, prefix: str = None) -> IMPORT_STRUCTURE_T: +def define_import_structure(module_path: str, prefix: Optional[str] = None) -> IMPORT_STRUCTURE_T: """ This method takes a module_path as input and creates an import structure digestible by a _LazyModule. diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index 816ff9ae559..72b3837142c 100644 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -1308,13 +1308,13 @@ class CompressedTensorsConfig(QuantizationConfigMixin): def __init__( self, - config_groups: Dict[str, Union["QuantizationScheme", List[str]]] = None, # noqa: F821 + config_groups: Optional[Dict[str, Union["QuantizationScheme", List[str]]]] = None, # noqa: F821 format: str = "dense", quantization_status: "QuantizationStatus" = "initialized", # noqa: F821 kv_cache_scheme: Optional["QuantizationArgs"] = None, # noqa: F821 global_compression_ratio: Optional[float] = None, ignore: Optional[List[str]] = None, - sparsity_config: Dict[str, Any] = None, + sparsity_config: Optional[Dict[str, Any]] = None, quant_method: str = "compressed-tensors", run_compressed: bool = True, **kwargs, diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index b5215c1e9f2..b2b96613e63 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -19,6 +19,7 @@ import os import unittest from copy import deepcopy from functools import partial +from typing import Optional import datasets from parameterized import parameterized @@ -1252,8 +1253,8 @@ class TestDeepSpeedWithLauncher(TestCasePlus): do_eval: bool = True, quality_checks: bool = True, fp32: bool = False, - extra_args_str: str = None, - remove_args_str: str = None, + extra_args_str: Optional[str] = None, + remove_args_str: Optional[str] = None, ): # we are doing quality testing so using a small real model output_dir = self.run_trainer( @@ -1285,8 +1286,8 @@ class TestDeepSpeedWithLauncher(TestCasePlus): do_eval: bool = True, distributed: bool = True, fp32: bool = False, - extra_args_str: str = None, - remove_args_str: str = None, + extra_args_str: Optional[str] = None, + remove_args_str: Optional[str] = None, ): max_len = 32 data_dir = self.test_file_dir / "../fixtures/tests_samples/wmt_en_ro" diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index afa8be4b8f6..dc087559d45 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -17,6 +17,7 @@ import os import re import sys from pathlib import Path +from typing import Optional from unittest.mock import patch from parameterized import parameterized @@ -270,13 +271,13 @@ class TestTrainerExt(TestCasePlus): learning_rate: float = 3e-3, optim: str = "adafactor", distributed: bool = False, - extra_args_str: str = None, + extra_args_str: Optional[str] = None, eval_steps: int = 0, predict_with_generate: bool = True, do_train: bool = True, do_eval: bool = True, do_predict: bool = True, - n_gpus_to_use: int = None, + n_gpus_to_use: Optional[int] = None, ): data_dir = self.test_file_dir / "../fixtures/tests_samples/wmt_en_ro" output_dir = self.get_auto_remove_tmp_dir() diff --git a/tests/models/bridgetower/test_image_processing_bridgetower.py b/tests/models/bridgetower/test_image_processing_bridgetower.py index 388bb65f69d..12d2d03b775 100644 --- a/tests/models/bridgetower/test_image_processing_bridgetower.py +++ b/tests/models/bridgetower/test_image_processing_bridgetower.py @@ -41,7 +41,7 @@ class BridgeTowerImageProcessingTester: self, parent, do_resize: bool = True, - size: dict[str, int] = None, + size: Optional[dict[str, int]] = None, size_divisor: int = 32, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py index 5c5ff131533..ad00eab111f 100644 --- a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py +++ b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py @@ -92,12 +92,12 @@ class PatchTSMixerModelTester: head_dropout: float = 0.2, # forecast related prediction_length: int = 16, - out_channels: int = None, + out_channels: Optional[int] = None, # Classification/regression related # num_labels: int = 3, num_targets: int = 3, - output_range: list = None, - head_aggregation: str = None, + output_range: Optional[list] = None, + head_aggregation: Optional[str] = None, # Trainer related batch_size=13, is_training=True, diff --git a/tests/models/tvp/test_image_processing_tvp.py b/tests/models/tvp/test_image_processing_tvp.py index 6183c2e163e..20f4ed73697 100644 --- a/tests/models/tvp/test_image_processing_tvp.py +++ b/tests/models/tvp/test_image_processing_tvp.py @@ -41,12 +41,12 @@ class TvpImageProcessingTester: do_resize: bool = True, size: dict[str, int] = {"longest_edge": 40}, do_center_crop: bool = False, - crop_size: dict[str, int] = None, + crop_size: Optional[dict[str, int]] = None, do_rescale: bool = False, rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, pad_size: dict[str, int] = {"height": 80, "width": 80}, - fill: int = None, + fill: Optional[int] = None, pad_mode: PaddingMode = None, do_normalize: bool = True, image_mean: Optional[Union[float, list[float]]] = [0.48145466, 0.4578275, 0.40821073], diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 0e2ab52203a..356473d11ad 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -28,7 +28,7 @@ from collections import OrderedDict from functools import lru_cache from itertools import takewhile from pathlib import Path -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any, Optional, Union from parameterized import parameterized @@ -173,7 +173,7 @@ def _test_subword_regularization_tokenizer(in_queue, out_queue, timeout): def check_subword_sampling( tokenizer: PreTrainedTokenizer, - text: str = None, + text: Optional[str] = None, test_sentencepiece_ignore_case: bool = True, ) -> None: """ @@ -321,9 +321,9 @@ class TokenizerTesterMixin: self, expected_encoding: dict, model_name: str, - revision: str = None, - sequences: list[str] = None, - decode_kwargs: dict[str, Any] = None, + revision: Optional[str] = None, + sequences: Optional[list[str]] = None, + decode_kwargs: Optional[dict[str, Any]] = None, padding: bool = True, ): """ diff --git a/utils/check_copies.py b/utils/check_copies.py index 0dffa79a327..9b392f13673 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -390,7 +390,7 @@ def split_code_into_blocks( def find_code_in_transformers( - object_name: str, base_path: str = None, return_indices: bool = False + object_name: str, base_path: Optional[str] = None, return_indices: bool = False ) -> Union[str, Tuple[List[str], int, int]]: """ Find and return the source code of an object. @@ -491,7 +491,7 @@ def replace_code(code: str, replace_pattern: str) -> str: return code -def find_code_and_splits(object_name: str, base_path: str, buffer: dict = None): +def find_code_and_splits(object_name: str, base_path: str, buffer: Optional[dict] = None): """Find the code of an object (specified by `object_name`) and split it into blocks. Args: @@ -638,7 +638,9 @@ def check_codes_match(observed_code: str, theoretical_code: str) -> Optional[int diff_index += 1 -def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = None) -> Optional[List[Tuple[str, int]]]: +def is_copy_consistent( + filename: str, overwrite: bool = False, buffer: Optional[dict] = None +) -> Optional[List[Tuple[str, int]]]: """ Check if the code commented as a copy in a file matches the original. @@ -831,7 +833,7 @@ def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = No return diffs -def check_copies(overwrite: bool = False, file: str = None): +def check_copies(overwrite: bool = False, file: Optional[str] = None): """ Check every file is copy-consistent with the original. Also check the model list in the main README and other READMEs are consistent. diff --git a/utils/notification_service.py b/utils/notification_service.py index f7f3d16e558..17e3b9d4982 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -107,7 +107,7 @@ class Message: ci_title: str, model_results: Dict, additional_results: Dict, - selected_warnings: List = None, + selected_warnings: Optional[List] = None, prev_ci_artifacts=None, ): self.title = title @@ -856,7 +856,7 @@ def retrieve_available_artifacts(): def __str__(self): return self.name - def add_path(self, path: str, gpu: str = None): + def add_path(self, path: str, gpu: Optional[str] = None): self.paths.append({"name": self.name, "path": path, "gpu": gpu}) _available_artifacts: Dict[str, Artifact] = {} diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index e2a256dfd6f..afdd35b9a8d 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -59,7 +59,7 @@ import re import tempfile from contextlib import contextmanager from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union from git import Repo @@ -621,7 +621,7 @@ _re_single_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S* _re_multi_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S*)\s+import\s+\(([^\)]+)\)") -def extract_imports(module_fname: str, cache: Dict[str, List[str]] = None) -> List[str]: +def extract_imports(module_fname: str, cache: Optional[Dict[str, List[str]]] = None) -> List[str]: """ Get the imports a given module makes. @@ -703,7 +703,7 @@ def extract_imports(module_fname: str, cache: Dict[str, List[str]] = None) -> Li return result -def get_module_dependencies(module_fname: str, cache: Dict[str, List[str]] = None) -> List[str]: +def get_module_dependencies(module_fname: str, cache: Optional[Dict[str, List[str]]] = None) -> List[str]: """ Refines the result of `extract_imports` to remove subfolders and get a proper list of module filenames: if a file as an import `from utils import Foo, Bar`, with `utils` being a subfolder containing many files, this will traverse @@ -953,7 +953,7 @@ def create_reverse_dependency_map() -> Dict[str, List[str]]: def create_module_to_test_map( - reverse_map: Dict[str, List[str]] = None, filter_models: bool = False + reverse_map: Optional[Dict[str, List[str]]] = None, filter_models: bool = False ) -> Dict[str, List[str]]: """ Extract the tests from the reverse_dependency_map and potentially filters the model tests.