diff --git a/src/transformers/generation/candidate_generator.py b/src/transformers/generation/candidate_generator.py index 94246b30f5a..b4478bcbac0 100644 --- a/src/transformers/generation/candidate_generator.py +++ b/src/transformers/generation/candidate_generator.py @@ -710,8 +710,8 @@ class AssistantToTargetTranslator: assistant_model: Optional["PreTrainedModel"] = None, assistant_prune_lm_head: bool = False, ): - self._target_tokenizer: "PreTrainedTokenizerBase" = target_tokenizer - self._assistant_tokenizer: "PreTrainedTokenizerBase" = assistant_tokenizer + self._target_tokenizer: PreTrainedTokenizerBase = target_tokenizer + self._assistant_tokenizer: PreTrainedTokenizerBase = assistant_tokenizer self._assistant_model_device: str = ( assistant_model_device if assistant_model is None else assistant_model.device ) diff --git a/src/transformers/generation/streamers.py b/src/transformers/generation/streamers.py index 863fd67bddc..399ac2a3f5b 100644 --- a/src/transformers/generation/streamers.py +++ b/src/transformers/generation/streamers.py @@ -72,7 +72,7 @@ class TextStreamer(BaseStreamer): ``` """ - def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs): + def __init__(self, tokenizer: AutoTokenizer, skip_prompt: bool = False, **decode_kwargs): self.tokenizer = tokenizer self.skip_prompt = skip_prompt self.decode_kwargs = decode_kwargs @@ -206,7 +206,7 @@ class TextIteratorStreamer(TextStreamer): """ def __init__( - self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs + self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs ): super().__init__(tokenizer, skip_prompt, **decode_kwargs) self.text_queue = Queue() @@ -284,7 +284,7 @@ class AsyncTextIteratorStreamer(TextStreamer): """ def __init__( - self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs + self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs ): super().__init__(tokenizer, skip_prompt, **decode_kwargs) self.text_queue = asyncio.Queue() diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 484b4954cfa..e1dc9cf1248 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -4723,7 +4723,7 @@ class GenerationMixin(ContinuousMixin): ) if return_dict_in_generate and output_scores: - beam_indices = tuple((beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices)))) + beam_indices = tuple(beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices))) # increase cur_len cur_len = cur_len + 1 diff --git a/src/transformers/integrations/integration_utils.py b/src/transformers/integrations/integration_utils.py index bcfebdbd113..11274a60b48 100755 --- a/src/transformers/integrations/integration_utils.py +++ b/src/transformers/integrations/integration_utils.py @@ -1626,8 +1626,8 @@ class NeptuneCallback(TrainerCallback): target_path = consistent_checkpoint_path except OSError as e: logger.warning( - "NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'. " - "Could fail trying to upload.".format(e) + f"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{e}'. " + "Could fail trying to upload." ) self._metadata_namespace[self._target_checkpoints_namespace].upload_files(target_path) @@ -1976,9 +1976,7 @@ class ClearMLCallback(TrainerCallback): ) except Exception as e: logger.warning( - "Could not remove checkpoint `{}` after going over the `save_total_limit`. Error is: {}".format( - self._checkpoints_saved[0].name, e - ) + f"Could not remove checkpoint `{self._checkpoints_saved[0].name}` after going over the `save_total_limit`. Error is: {e}" ) break self._checkpoints_saved = self._checkpoints_saved[1:] diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 4c758daed87..b64e3569459 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1409,10 +1409,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT def prepare_tf_dataset( self, - dataset: "datasets.Dataset", # noqa:F821 + dataset: datasets.Dataset, # noqa:F821 batch_size: int = 8, shuffle: bool = True, - tokenizer: Optional["PreTrainedTokenizerBase"] = None, + tokenizer: Optional[PreTrainedTokenizerBase] = None, collate_fn: Optional[Callable] = None, collate_fn_args: Optional[dict[str, Any]] = None, drop_remainder: Optional[bool] = None, diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 3b235d9aeac..4bde1f4451c 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4424,10 +4424,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi raise ValueError("DeepSpeed Zero-3 is not compatible with passing a `device_map`.") if not is_accelerate_available(): raise ValueError( - ( - "Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` " - "requires `accelerate`. You can install it with `pip install accelerate`" - ) + "Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` " + "requires `accelerate`. You can install it with `pip install accelerate`" ) # handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation. diff --git a/src/transformers/models/albert/tokenization_albert.py b/src/transformers/models/albert/tokenization_albert.py index 69de95a2dfe..011ad689edb 100644 --- a/src/transformers/models/albert/tokenization_albert.py +++ b/src/transformers/models/albert/tokenization_albert.py @@ -203,7 +203,7 @@ class AlbertTokenizer(PreTrainedTokenizer): pieces = self.sp_model.encode(text, out_type=str) new_pieces = [] for piece in pieces: - if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit(): + if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): # Logic to handle special cases see https://github.com/google-research/bert/blob/master/README.md#tokenization # `9,9` -> ['▁9', ',', '9'] instead of [`_9,`, '9'] cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, "")) diff --git a/src/transformers/models/bamba/modeling_bamba.py b/src/transformers/models/bamba/modeling_bamba.py index 95af1f25136..c66dc73a96c 100644 --- a/src/transformers/models/bamba/modeling_bamba.py +++ b/src/transformers/models/bamba/modeling_bamba.py @@ -830,7 +830,7 @@ class BambaMixer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/bamba/modular_bamba.py b/src/transformers/models/bamba/modular_bamba.py index 0d42d6e1432..3fec4ad35cf 100644 --- a/src/transformers/models/bamba/modular_bamba.py +++ b/src/transformers/models/bamba/modular_bamba.py @@ -632,7 +632,7 @@ class BambaMixer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/bart/tokenization_bart.py b/src/transformers/models/bart/tokenization_bart.py index e5c216346c1..f674afe1a41 100644 --- a/src/transformers/models/bart/tokenization_bart.py +++ b/src/transformers/models/bart/tokenization_bart.py @@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"} # See all BART models at https://huggingface.co/models?filter=bart -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py index 70a6c78e065..086e62561fd 100755 --- a/src/transformers/models/beit/modeling_beit.py +++ b/src/transformers/models/beit/modeling_beit.py @@ -110,7 +110,7 @@ class BeitDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Based on timm implementation, which can be found here: @@ -513,8 +513,8 @@ class BeitLayer(nn.Module): init_values = config.layer_scale_init_value if init_values > 0: - self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) - self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) + self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) + self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) else: self.lambda_1, self.lambda_2 = None, None diff --git a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py index decf3dfa5c3..002011795c4 100644 --- a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py +++ b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py @@ -934,7 +934,7 @@ class SentencepieceTokenizer: pieces = self.sp_model.encode(text, out_type=str) new_pieces = [] for piece in pieces: - if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit(): + if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, "")) if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: if len(cur_pieces[0]) == 1: diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py index c390d2e39f6..8da189b1b30 100755 --- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py @@ -115,7 +115,7 @@ class Dictionary: except FileNotFoundError as fnfe: raise fnfe except UnicodeError: - raise Exception("Incorrect encoding detected in {}, please rebuild the dataset".format(f)) + raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset") return lines = f.readlines() @@ -133,11 +133,11 @@ class Dictionary: word = line if word in self and not overwrite: raise RuntimeError( - "Duplicate word found when loading Dictionary: '{}'. " + f"Duplicate word found when loading Dictionary: '{word}'. " "Duplicate words can overwrite earlier ones by adding the " "#fairseq:overwrite flag at the end of the corresponding row " "in the dictionary file. If using the Camembert model, please " - "download an updated copy of the model file.".format(word) + "download an updated copy of the model file." ) self.add_symbol(word, n=count, overwrite=overwrite) except ValueError: diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index d95e3537819..1a7a016f9cd 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -310,7 +310,7 @@ class BitDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" def make_div(value, divisor=8): diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot.py b/src/transformers/models/blenderbot/tokenization_blenderbot.py index 02be9fd74ae..76719fa2549 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot.py @@ -35,7 +35,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache # Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/blip/modeling_blip_text.py b/src/transformers/models/blip/modeling_blip_text.py index 151caef0b10..2dac6b3493f 100644 --- a/src/transformers/models/blip/modeling_blip_text.py +++ b/src/transformers/models/blip/modeling_blip_text.py @@ -641,9 +641,7 @@ class BlipTextModel(BlipTextPreTrainedModel): extended_attention_mask = attention_mask[:, None, None, :] else: raise ValueError( - "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format( - input_shape, attention_mask.shape - ) + f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})" ) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for @@ -723,7 +721,7 @@ class BlipTextModel(BlipTextPreTrainedModel): past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 if attention_mask is None: - attention_mask = torch.ones(((batch_size, seq_length + past_key_values_length))).to(device) + attention_mask = torch.ones((batch_size, seq_length + past_key_values_length)).to(device) # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] # ourselves in which case we just need to make it broadcastable to all heads. diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index e85d3e627ce..3d0a480fe62 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -800,9 +800,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel): extended_attention_mask = attention_mask[:, None, None, :] else: raise ValueError( - "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format( - input_shape, attention_mask.shape - ) + f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})" ) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for @@ -881,7 +879,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel): past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 if attention_mask is None: - attention_mask = tf.ones(((batch_size, seq_length + past_key_values_length))) + attention_mask = tf.ones((batch_size, seq_length + past_key_values_length)) # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] # ourselves in which case we just need to make it broadcastable to all heads. diff --git a/src/transformers/models/blip_2/modeling_blip_2.py b/src/transformers/models/blip_2/modeling_blip_2.py index 970ee7ad0c4..4382296969e 100644 --- a/src/transformers/models/blip_2/modeling_blip_2.py +++ b/src/transformers/models/blip_2/modeling_blip_2.py @@ -1144,9 +1144,7 @@ class Blip2QFormerModel(Blip2PreTrainedModel): extended_attention_mask = attention_mask[:, None, None, :] else: raise ValueError( - "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format( - input_shape, attention_mask.shape - ) + f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})" ) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index c4aa6f27c96..b9c9b27bd3c 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -98,7 +98,7 @@ def convert_bloom_checkpoint_to_pytorch( config = BloomConfig() for j, file in enumerate(file_names): - print("Processing file: {}".format(file)) + print(f"Processing file: {file}") tensors = None for i in range(pretraining_tp): @@ -132,7 +132,7 @@ def convert_bloom_checkpoint_to_pytorch( tensors, os.path.join( pytorch_dump_folder_path, - "pytorch_model_{}-of-{}.bin".format(str(j + 1).zfill(5), str(len(file_names)).zfill(5)), + f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin", ), ) @@ -140,8 +140,8 @@ def convert_bloom_checkpoint_to_pytorch( value = tensors[key] total_size += value.numel() * get_dtype_size(value.dtype) if key not in index_dict["weight_map"]: - index_dict["weight_map"][key] = "pytorch_model_{}-of-{}.bin".format( - str(j + 1).zfill(5), str(len(file_names)).zfill(5) + index_dict["weight_map"][key] = ( + f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin" ) config = BloomConfig() diff --git a/src/transformers/models/clap/modeling_clap.py b/src/transformers/models/clap/modeling_clap.py index 0f8058ad76a..6a44e36ade4 100644 --- a/src/transformers/models/clap/modeling_clap.py +++ b/src/transformers/models/clap/modeling_clap.py @@ -610,7 +610,7 @@ class ClapAudioLayer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/clip/tokenization_clip.py b/src/transformers/models/clip/tokenization_clip.py index e4a142f0d3b..2db3fd4a9af 100644 --- a/src/transformers/models/clip/tokenization_clip.py +++ b/src/transformers/models/clip/tokenization_clip.py @@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control @@ -488,7 +488,7 @@ class CLIPTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -506,8 +506,8 @@ class CLIPTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py index be2cfdee87d..6f4a60737bf 100644 --- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py +++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py @@ -181,7 +181,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_ missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False) if missing_keys != ["clip.text_model.embeddings.position_ids", "clip.vision_model.embeddings.position_ids"]: - raise ValueError("Missing keys that are not expected: {}".format(missing_keys)) + raise ValueError(f"Missing keys that are not expected: {missing_keys}") if unexpected_keys != ["decoder.reduce.weight", "decoder.reduce.bias"]: raise ValueError(f"Unexpected keys: {unexpected_keys}") diff --git a/src/transformers/models/clvp/tokenization_clvp.py b/src/transformers/models/clvp/tokenization_clvp.py index 39e7429dfb2..61bd5964ecd 100644 --- a/src/transformers/models/clvp/tokenization_clvp.py +++ b/src/transformers/models/clvp/tokenization_clvp.py @@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache # Copied from transformers.models.gpt2.tokenization_gpt2.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/codegen/tokenization_codegen.py b/src/transformers/models/codegen/tokenization_codegen.py index 99f1facb1f8..152b1a84fc3 100644 --- a/src/transformers/models/codegen/tokenization_codegen.py +++ b/src/transformers/models/codegen/tokenization_codegen.py @@ -42,7 +42,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py index ccb78451031..81e262be041 100755 --- a/src/transformers/models/convnext/modeling_convnext.py +++ b/src/transformers/models/convnext/modeling_convnext.py @@ -70,7 +70,7 @@ class ConvNextDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class ConvNextLayerNorm(nn.Module): @@ -149,7 +149,7 @@ class ConvNextLayer(nn.Module): self.act = ACT2FN[config.hidden_act] self.pwconv2 = nn.Linear(4 * dim, dim) self.layer_scale_parameter = ( - nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True) + nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True) if config.layer_scale_init_value > 0 else None ) diff --git a/src/transformers/models/convnextv2/modeling_convnextv2.py b/src/transformers/models/convnextv2/modeling_convnextv2.py index ef9d282b9ef..c2b3372c97a 100644 --- a/src/transformers/models/convnextv2/modeling_convnextv2.py +++ b/src/transformers/models/convnextv2/modeling_convnextv2.py @@ -70,7 +70,7 @@ class ConvNextV2DropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class ConvNextV2GRN(nn.Module): diff --git a/src/transformers/models/cpm/tokenization_cpm.py b/src/transformers/models/cpm/tokenization_cpm.py index fa367aceb5e..c045c41eb4e 100644 --- a/src/transformers/models/cpm/tokenization_cpm.py +++ b/src/transformers/models/cpm/tokenization_cpm.py @@ -207,7 +207,7 @@ class CpmTokenizer(PreTrainedTokenizer): pieces = self.sp_model.encode(text, out_type=str) new_pieces = [] for piece in pieces: - if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit(): + if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, "")) if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: if len(cur_pieces[0]) == 1: diff --git a/src/transformers/models/cvt/modeling_cvt.py b/src/transformers/models/cvt/modeling_cvt.py index d0cd4b554b1..691a2467535 100644 --- a/src/transformers/models/cvt/modeling_cvt.py +++ b/src/transformers/models/cvt/modeling_cvt.py @@ -86,7 +86,7 @@ class CvtDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class CvtEmbeddings(nn.Module): diff --git a/src/transformers/models/d_fine/modeling_d_fine.py b/src/transformers/models/d_fine/modeling_d_fine.py index b2c27619092..7d8f988430d 100644 --- a/src/transformers/models/d_fine/modeling_d_fine.py +++ b/src/transformers/models/d_fine/modeling_d_fine.py @@ -187,7 +187,7 @@ class DFineMultiscaleDeformableAttention(nn.Module): sampling_locations = reference_points[:, :, None, :, :2] + offset else: raise ValueError( - "Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1]) + f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead." ) output = self.ms_deformable_attn_core( diff --git a/src/transformers/models/d_fine/modular_d_fine.py b/src/transformers/models/d_fine/modular_d_fine.py index 03791838c27..83b31cf5659 100644 --- a/src/transformers/models/d_fine/modular_d_fine.py +++ b/src/transformers/models/d_fine/modular_d_fine.py @@ -517,7 +517,7 @@ class DFineMultiscaleDeformableAttention(nn.Module): sampling_locations = reference_points[:, :, None, :, :2] + offset else: raise ValueError( - "Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1]) + f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead." ) output = self.ms_deformable_attn_core( diff --git a/src/transformers/models/dab_detr/modeling_dab_detr.py b/src/transformers/models/dab_detr/modeling_dab_detr.py index a9369b8700d..c977f4b923b 100644 --- a/src/transformers/models/dab_detr/modeling_dab_detr.py +++ b/src/transformers/models/dab_detr/modeling_dab_detr.py @@ -384,7 +384,7 @@ def gen_sine_position_embeddings(pos_tensor, hidden_size=256): pos = torch.cat((pos_y, pos_x, pos_w, pos_h), dim=2) else: - raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1))) + raise ValueError(f"Unknown pos_tensor shape(-1):{pos_tensor.size(-1)}") return pos @@ -1254,7 +1254,7 @@ class DabDetrModel(DabDetrPreTrainedModel): self.num_patterns = config.num_patterns if not isinstance(self.num_patterns, int): - logger.warning("num_patterns should be int but {}".format(type(self.num_patterns))) + logger.warning(f"num_patterns should be int but {type(self.num_patterns)}") self.num_patterns = 0 if self.num_patterns > 0: self.patterns = nn.Embedding(self.num_patterns, self.hidden_size) diff --git a/src/transformers/models/dac/convert_dac_checkpoint.py b/src/transformers/models/dac/convert_dac_checkpoint.py index b1728a7da11..3608d3b4a9f 100644 --- a/src/transformers/models/dac/convert_dac_checkpoint.py +++ b/src/transformers/models/dac/convert_dac_checkpoint.py @@ -157,24 +157,12 @@ def recursively_load_weights(orig_dict, hf_model, model_name): elif len(mapped_key) == 3: integers = re.findall(r"\b\d+\b", name) if mapped_key[0][0] == "d": - mapped_key = "{}.{}.{}{}.{}".format( - mapped_key[0], - str(int(integers[0]) - 1), - mapped_key[1], - str(int(integers[1]) - 1), - mapped_key[2], - ) + mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) - 1)}.{mapped_key[2]}" else: - mapped_key = "{}.{}.{}{}.{}".format( - mapped_key[0], - str(int(integers[0]) - 1), - mapped_key[1], - str(int(integers[1]) + 1), - mapped_key[2], - ) + mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) + 1)}.{mapped_key[2]}" elif len(mapped_key) == 2: integers = re.findall(r"\b\d+\b", name) - mapped_key = "{}.{}.{}".format(mapped_key[0], str(int(integers[0]) - 1), mapped_key[1]) + mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}" is_used = True if "weight_g" in name: diff --git a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py index 3f9d7773516..910e1fc8e24 100755 --- a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py @@ -185,18 +185,12 @@ def load_beit_model(args, is_finetuned, is_large): missing_keys = warn_missing_keys if len(missing_keys) > 0: - print( - "Weights of {} not initialized from pretrained model: {}".format( - model.__class__.__name__, missing_keys - ) - ) + print(f"Weights of {model.__class__.__name__} not initialized from pretrained model: {missing_keys}") if len(unexpected_keys) > 0: - print("Weights from pretrained model not used in {}: {}".format(model.__class__.__name__, unexpected_keys)) + print(f"Weights from pretrained model not used in {model.__class__.__name__}: {unexpected_keys}") if len(ignore_missing_keys) > 0: print( - "Ignored weights of {} not initialized from pretrained model: {}".format( - model.__class__.__name__, ignore_missing_keys - ) + f"Ignored weights of {model.__class__.__name__} not initialized from pretrained model: {ignore_missing_keys}" ) if len(error_msgs) > 0: print("\n".join(error_msgs)) diff --git a/src/transformers/models/data2vec/modeling_data2vec_vision.py b/src/transformers/models/data2vec/modeling_data2vec_vision.py index 8762fedb102..c48782d2477 100644 --- a/src/transformers/models/data2vec/modeling_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_data2vec_vision.py @@ -101,7 +101,7 @@ class Data2VecVisionDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Copied from transformers.models.beit.modeling_beit.BeitEmbeddings with Beit->Data2VecVision @@ -515,8 +515,8 @@ class Data2VecVisionLayer(nn.Module): init_values = config.layer_scale_init_value if init_values > 0: - self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) - self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) + self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) + self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) else: self.lambda_1, self.lambda_2 = None, None diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 399da1d9d83..766c127cd49 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -306,7 +306,7 @@ class TFData2VecVisionSelfAttention(keras.layers.Layer): hidden_states: tf.Tensor, head_mask: tf.Tensor, output_attentions: bool, - relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None, + relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, training: bool = False, ) -> tuple[tf.Tensor]: batch_size = shape_list(hidden_states)[0] @@ -416,7 +416,7 @@ class TFData2VecVisionAttention(keras.layers.Layer): input_tensor: tf.Tensor, head_mask: tf.Tensor, output_attentions: bool, - relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None, + relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, training: bool = False, ) -> tuple[tf.Tensor]: self_outputs = self.attention( @@ -538,8 +538,8 @@ class TFData2VecVisionLayer(keras.layers.Layer): trainable=True, name="lambda_2", ) - self.lambda_1.assign(self.init_values * tf.ones((self.config.hidden_size))) - self.lambda_2.assign(self.init_values * tf.ones((self.config.hidden_size))) + self.lambda_1.assign(self.init_values * tf.ones(self.config.hidden_size)) + self.lambda_2.assign(self.init_values * tf.ones(self.config.hidden_size)) else: self.lambda_1, self.lambda_2 = None, None @@ -570,7 +570,7 @@ class TFData2VecVisionLayer(keras.layers.Layer): hidden_states: tf.Tensor, head_mask: tf.Tensor, output_attentions: bool, - relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None, + relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, training: bool = False, ) -> tuple[tf.Tensor]: self_attention_outputs = self.attention( diff --git a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py index 9a768bfd9f9..6eb50621891 100644 --- a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py +++ b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py @@ -113,7 +113,7 @@ class DeepseekV3TopkRouter(nn.Module): self.norm_topk_prob = config.norm_topk_prob self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size))) - self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts))) + self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts)) @torch.no_grad() def get_topk_indices(self, scores): diff --git a/src/transformers/models/deepseek_v3/modular_deepseek_v3.py b/src/transformers/models/deepseek_v3/modular_deepseek_v3.py index b25ae45c06d..73f3acc2047 100644 --- a/src/transformers/models/deepseek_v3/modular_deepseek_v3.py +++ b/src/transformers/models/deepseek_v3/modular_deepseek_v3.py @@ -110,7 +110,7 @@ class DeepseekV3TopkRouter(nn.Module): self.norm_topk_prob = config.norm_topk_prob self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size))) - self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts))) + self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts)) @torch.no_grad() def get_topk_indices(self, scores): diff --git a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py index fa84afc00d3..5e1a0cdf9aa 100644 --- a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py @@ -270,7 +270,7 @@ class EfficientFormerDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class EfficientFormerFlat(nn.Module): @@ -303,8 +303,8 @@ class EfficientFormerMeta3D(nn.Module): self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity() self.use_layer_scale = config.use_layer_scale if config.use_layer_scale: - self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True) - self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True) + self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True) + self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True) def forward(self, hidden_states: torch.Tensor, output_attentions: bool = False) -> tuple[torch.Tensor]: self_attention_outputs = self.token_mixer(self.layernorm1(hidden_states), output_attentions) @@ -370,8 +370,8 @@ class EfficientFormerMeta4D(nn.Module): self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity() self.use_layer_scale = config.use_layer_scale if config.use_layer_scale: - self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True) - self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True) + self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True) + self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True) def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor]: outputs = self.token_mixer(hidden_states) diff --git a/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py b/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py index e6f2c212217..4c33abd043e 100644 --- a/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py +++ b/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for Ernie-M.""" -import io import os import unicodedata from typing import Any, Optional @@ -172,7 +171,7 @@ class ErnieMTokenizer(PreTrainedTokenizer): def clean_text(self, text): """Performs invalid character removal and whitespace cleanup on text.""" - return "".join((self.SP_CHAR_MAPPING.get(c, c) for c in text)) + return "".join(self.SP_CHAR_MAPPING.get(c, c) for c in text) def _tokenize(self, text, enable_sampling=False, nbest_size=64, alpha=0.1): """Tokenize a string.""" @@ -373,7 +372,7 @@ class ErnieMTokenizer(PreTrainedTokenizer): def load_vocab(self, filepath): token_to_idx = {} - with io.open(filepath, "r", encoding="utf-8") as f: + with open(filepath, "r", encoding="utf-8") as f: for index, line in enumerate(f): token = line.rstrip("\n") token_to_idx[token] = int(index) diff --git a/src/transformers/models/deprecated/mega/modeling_mega.py b/src/transformers/models/deprecated/mega/modeling_mega.py index d6ac15bebf7..c31dc909284 100644 --- a/src/transformers/models/deprecated/mega/modeling_mega.py +++ b/src/transformers/models/deprecated/mega/modeling_mega.py @@ -118,7 +118,7 @@ class MegaSimpleRelativePositionalBias(nn.Module): def forward(self, seq_len): if seq_len > self.max_positions: - raise ValueError("Sequence length {} going beyond max length {}".format(seq_len, self.max_positions)) + raise ValueError(f"Sequence length {seq_len} going beyond max length {self.max_positions}") # seq_len * 2 - 1 bias = self.rel_pos_bias[(self.max_positions - seq_len) : (self.max_positions + seq_len - 1)] @@ -298,7 +298,7 @@ class MegaSequenceNorm(nn.Module): elif norm_type == "syncbatchnorm": self.norm = nn.SyncBatchNorm(embedding_dim, eps=eps, affine=affine) else: - raise ValueError("Unknown norm type: {}".format(norm_type)) + raise ValueError(f"Unknown norm type: {norm_type}") def forward(self, input): if isinstance(self.norm, nn.modules.batchnorm._BatchNorm): @@ -563,7 +563,7 @@ class MegaGatedCrossAttention(nn.Module): elif self.config.relative_positional_bias == "rotary": self.rel_pos_bias = MegaRotaryRelativePositionalBias(config) else: - raise ValueError("unknown relative position bias: {}".format(self.config.relative_positional_bias)) + raise ValueError(f"unknown relative position bias: {self.config.relative_positional_bias}") self.softmax = nn.Softmax(dim=-1) diff --git a/src/transformers/models/deprecated/nat/modeling_nat.py b/src/transformers/models/deprecated/nat/modeling_nat.py index 930c8f29821..15c7dc62b35 100644 --- a/src/transformers/models/deprecated/nat/modeling_nat.py +++ b/src/transformers/models/deprecated/nat/modeling_nat.py @@ -287,7 +287,7 @@ class NatDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class NeighborhoodAttention(nn.Module): diff --git a/src/transformers/models/deprecated/tapex/tokenization_tapex.py b/src/transformers/models/deprecated/tapex/tokenization_tapex.py index 654eba14a82..4ad87dd4e7a 100644 --- a/src/transformers/models/deprecated/tapex/tokenization_tapex.py +++ b/src/transformers/models/deprecated/tapex/tokenization_tapex.py @@ -99,7 +99,7 @@ TAPEX_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" """ -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/deprecated/van/modeling_van.py b/src/transformers/models/deprecated/van/modeling_van.py index b31b0616ffa..2d9917164d9 100644 --- a/src/transformers/models/deprecated/van/modeling_van.py +++ b/src/transformers/models/deprecated/van/modeling_van.py @@ -79,7 +79,7 @@ class VanDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class VanOverlappingPatchEmbedder(nn.Module): @@ -204,7 +204,7 @@ class VanLayerScaling(nn.Module): def __init__(self, hidden_size: int, initial_value: float = 1e-2): super().__init__() - self.weight = nn.Parameter(initial_value * torch.ones((hidden_size)), requires_grad=True) + self.weight = nn.Parameter(initial_value * torch.ones(hidden_size), requires_grad=True) def forward(self, hidden_state: torch.Tensor) -> torch.Tensor: # unsqueezing for broadcasting diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index 25a1a3ae5c6..2afe614dc97 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -275,7 +275,7 @@ class DinatDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class NeighborhoodAttention(nn.Module): diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py index 8dfd7fc4097..bd35abb941a 100644 --- a/src/transformers/models/dinov2/modeling_dinov2.py +++ b/src/transformers/models/dinov2/modeling_dinov2.py @@ -343,7 +343,7 @@ class Dinov2DropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class Dinov2MLP(nn.Module): diff --git a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py index ba3ecf08748..c2eeb197021 100644 --- a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +++ b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py @@ -360,7 +360,7 @@ class Dinov2WithRegistersDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class Dinov2WithRegistersMLP(nn.Module): diff --git a/src/transformers/models/donut/modeling_donut_swin.py b/src/transformers/models/donut/modeling_donut_swin.py index 222b0986423..a63b0d3f0f5 100644 --- a/src/transformers/models/donut/modeling_donut_swin.py +++ b/src/transformers/models/donut/modeling_donut_swin.py @@ -393,7 +393,7 @@ class DonutSwinDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->DonutSwin @@ -625,7 +625,7 @@ class DonutSwinLayer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/esm/modeling_esmfold.py b/src/transformers/models/esm/modeling_esmfold.py index 9012a2ef814..3338b3d5007 100644 --- a/src/transformers/models/esm/modeling_esmfold.py +++ b/src/transformers/models/esm/modeling_esmfold.py @@ -1414,7 +1414,7 @@ class EsmFoldInvariantPointAttention(nn.Module): self.linear_b = EsmFoldLinear(c_z, config.num_heads_ipa) - self.head_weights = nn.Parameter(torch.zeros((config.num_heads_ipa))) + self.head_weights = nn.Parameter(torch.zeros(config.num_heads_ipa)) concat_out_dim = config.num_heads_ipa * (c_z + config.ipa_dim + config.num_v_points * 4) self.linear_out = EsmFoldLinear(concat_out_dim, c_s, init="final") diff --git a/src/transformers/models/esm/openfold_utils/residue_constants.py b/src/transformers/models/esm/openfold_utils/residue_constants.py index 235e54bfe49..9af36d7db74 100644 --- a/src/transformers/models/esm/openfold_utils/residue_constants.py +++ b/src/transformers/models/esm/openfold_utils/residue_constants.py @@ -398,7 +398,7 @@ def map_structure_with_atom_order(in_list: list, first_call: bool = True) -> lis return in_list -@functools.lru_cache(maxsize=None) +@functools.cache def load_stereo_chemical_props() -> tuple[ Mapping[str, list[Bond]], Mapping[str, list[Bond]], diff --git a/src/transformers/models/esm/openfold_utils/rigid_utils.py b/src/transformers/models/esm/openfold_utils/rigid_utils.py index 53bef9a4437..8d86151c4b0 100644 --- a/src/transformers/models/esm/openfold_utils/rigid_utils.py +++ b/src/transformers/models/esm/openfold_utils/rigid_utils.py @@ -16,7 +16,7 @@ from __future__ import annotations from collections.abc import Sequence -from functools import lru_cache +from functools import cache from typing import Any, Callable, Optional import numpy as np @@ -75,7 +75,7 @@ def rot_vec_mul(r: torch.Tensor, t: torch.Tensor) -> torch.Tensor: ) -@lru_cache(maxsize=None) +@cache def identity_rot_mats( batch_dims: tuple[int, ...], dtype: Optional[torch.dtype] = None, @@ -90,7 +90,7 @@ def identity_rot_mats( return rots -@lru_cache(maxsize=None) +@cache def identity_trans( batch_dims: tuple[int, ...], dtype: Optional[torch.dtype] = None, @@ -101,7 +101,7 @@ def identity_trans( return trans -@lru_cache(maxsize=None) +@cache def identity_quats( batch_dims: tuple[int, ...], dtype: Optional[torch.dtype] = None, @@ -220,7 +220,7 @@ _CACHED_QUATS: dict[str, np.ndarray] = { } -@lru_cache(maxsize=None) +@cache def _get_quat(quat_key: str, dtype: torch.dtype, device: torch.device) -> torch.Tensor: return torch.tensor(_CACHED_QUATS[quat_key], dtype=dtype, device=device) @@ -1070,7 +1070,7 @@ class Rigid: e0 = [c / denom for c in e0] dot = sum((c1 * c2 for c1, c2 in zip(e0, e1))) e1 = [c2 - c1 * dot for c1, c2 in zip(e0, e1)] - denom = torch.sqrt(sum((c * c for c in e1)) + eps * torch.ones_like(e1[0])) + denom = torch.sqrt(sum(c * c for c in e1) + eps * torch.ones_like(e1[0])) e1 = [c / denom for c in e1] e2 = [ e0[1] * e1[2] - e0[2] * e1[1], diff --git a/src/transformers/models/falcon_h1/modeling_falcon_h1.py b/src/transformers/models/falcon_h1/modeling_falcon_h1.py index 662d6254c84..eb75d8f2b80 100644 --- a/src/transformers/models/falcon_h1/modeling_falcon_h1.py +++ b/src/transformers/models/falcon_h1/modeling_falcon_h1.py @@ -949,7 +949,7 @@ class FalconH1Mixer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/falcon_h1/modular_falcon_h1.py b/src/transformers/models/falcon_h1/modular_falcon_h1.py index 85cd4db9116..3572d8eb454 100644 --- a/src/transformers/models/falcon_h1/modular_falcon_h1.py +++ b/src/transformers/models/falcon_h1/modular_falcon_h1.py @@ -748,7 +748,7 @@ class FalconH1Mixer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 70f29affdf6..fc8f47d87ce 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -318,7 +318,7 @@ class FlavaImageProcessor(BaseImageProcessor): image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size") return super().from_dict(image_processor_dict, **kwargs) - @lru_cache() + @lru_cache def masking_generator( self, input_size_patches, diff --git a/src/transformers/models/flava/image_processing_flava_fast.py b/src/transformers/models/flava/image_processing_flava_fast.py index 71ea422d90b..cab61081cf4 100644 --- a/src/transformers/models/flava/image_processing_flava_fast.py +++ b/src/transformers/models/flava/image_processing_flava_fast.py @@ -273,7 +273,7 @@ class FlavaImageProcessorFast(BaseImageProcessorFast): image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size") return super().from_dict(image_processor_dict, **kwargs) - @lru_cache() + @lru_cache def masking_generator( self, input_size_patches, diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index 99ffbaa9f30..3bd7b45d0dc 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -1446,7 +1446,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel): param.requires_grad = False def get_codebook_indices(self, pixel_values: torch.Tensor) -> torch.Tensor: - """ + f""" Args: pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing @@ -1458,8 +1458,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel): >>> import requests >>> from transformers import AutoImageProcessor, FlavaImageCodebook - >>> model = FlavaImageCodebook.from_pretrained("{0}") - >>> image_processor = AutoImageProcessor.from_pretrained("{0}") + >>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}") + >>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) @@ -1469,7 +1469,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel): >>> outputs = model.get_codebook_indices(**inputs) ``` - """.format(_CHECKPOINT_FOR_CODEBOOK_DOC) + """ z_logits = self.blocks(pixel_values) return torch.argmax(z_logits, axis=1) @@ -1478,7 +1478,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel): return nn.Softmax(dim=1)(z_logits) def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: - """ + f""" Args: pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing @@ -1491,8 +1491,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel): >>> import requests >>> from transformers import AutoImageProcessor, FlavaImageCodebook - >>> model = FlavaImageCodebook.from_pretrained("{0}") - >>> image_processor = AutoImageProcessor.from_pretrained("{0}") + >>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}") + >>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) @@ -1504,7 +1504,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel): >>> print(outputs.shape) (1, 196) ``` - """.format(_CHECKPOINT_FOR_CODEBOOK_DOC) + """ if len(pixel_values.shape) != 4: raise ValueError(f"input shape {pixel_values.shape} is not 4d") if pixel_values.shape[1] != self.input_channels: diff --git a/src/transformers/models/fnet/tokenization_fnet.py b/src/transformers/models/fnet/tokenization_fnet.py index a03f1b42a98..72aa202612e 100644 --- a/src/transformers/models/fnet/tokenization_fnet.py +++ b/src/transformers/models/fnet/tokenization_fnet.py @@ -177,7 +177,7 @@ class FNetTokenizer(PreTrainedTokenizer): pieces = self.sp_model.encode(text, out_type=str) new_pieces = [] for piece in pieces: - if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit(): + if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, "")) if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: if len(cur_pieces[0]) == 1: diff --git a/src/transformers/models/focalnet/modeling_focalnet.py b/src/transformers/models/focalnet/modeling_focalnet.py index 54abac6625b..232f1e6ed1f 100644 --- a/src/transformers/models/focalnet/modeling_focalnet.py +++ b/src/transformers/models/focalnet/modeling_focalnet.py @@ -293,7 +293,7 @@ class FocalNetDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class FocalNetModulation(nn.Module): @@ -431,8 +431,8 @@ class FocalNetLayer(nn.Module): self.gamma_1 = 1.0 self.gamma_2 = 1.0 if config.use_layerscale: - self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True) - self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True) + self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True) + self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True) def forward(self, hidden_state, input_dimensions): height, width = input_dimensions diff --git a/src/transformers/models/glpn/modeling_glpn.py b/src/transformers/models/glpn/modeling_glpn.py index 299eef53d1c..8715a09613a 100755 --- a/src/transformers/models/glpn/modeling_glpn.py +++ b/src/transformers/models/glpn/modeling_glpn.py @@ -65,7 +65,7 @@ class GLPNDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Copied from transformers.models.segformer.modeling_segformer.SegformerOverlapPatchEmbeddings diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index dc3dda8a0b1..608164ef2d8 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -33,7 +33,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py index 51785d48f4c..62af013448c 100644 --- a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +++ b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py @@ -757,7 +757,7 @@ class GraniteMoeHybridMambaLayer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 9603651fafc..200158d2ccb 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -921,7 +921,7 @@ class GroundingDinoDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class GroundingDinoFusionLayer(nn.Module): @@ -937,8 +937,8 @@ class GroundingDinoFusionLayer(nn.Module): # add layer scale for training stability self.drop_path = GroundingDinoDropPath(drop_path) if drop_path > 0.0 else nn.Identity() init_values = 1e-4 - self.vision_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True) - self.text_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True) + self.vision_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True) + self.text_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True) def forward( self, diff --git a/src/transformers/models/hiera/modeling_hiera.py b/src/transformers/models/hiera/modeling_hiera.py index e542c7b69a5..2fadde33211 100644 --- a/src/transformers/models/hiera/modeling_hiera.py +++ b/src/transformers/models/hiera/modeling_hiera.py @@ -459,7 +459,7 @@ class HieraDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class HieraMlp(nn.Module): diff --git a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py index 4966340493f..8d31e5e5c3b 100644 --- a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py @@ -203,7 +203,7 @@ def convert_hubert_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) with open(vocab_path, "w", encoding="utf-8") as vocab_handle: diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index f4a6715d6cf..ff47c6457b9 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -300,12 +300,7 @@ class IdeficsDecoupledEmbedding(nn.Embedding): return full_vector def extra_repr(self) -> str: - return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format( - self.num_embeddings, - self.num_additional_embeddings, - self.embedding_dim, - self.partially_freeze, - ) + return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.embedding_dim}, partially_freeze={self.partially_freeze}" class IdeficsDecoupledLinear(nn.Linear): @@ -364,13 +359,7 @@ class IdeficsDecoupledLinear(nn.Linear): def extra_repr(self) -> str: """Overwriting `nn.Linear.extra_repr` to include new parameters.""" - return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format( - self.in_features, - self.out_features, - self.out_additional_features, - self.bias is not None, - self.partially_freeze, - ) + return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}" # this was adapted from LlamaRMSNorm diff --git a/src/transformers/models/idefics/modeling_tf_idefics.py b/src/transformers/models/idefics/modeling_tf_idefics.py index 662fe4d88d1..405dee0d8f8 100644 --- a/src/transformers/models/idefics/modeling_tf_idefics.py +++ b/src/transformers/models/idefics/modeling_tf_idefics.py @@ -362,12 +362,7 @@ class TFIdeficsDecoupledEmbedding(tf.keras.layers.Embedding): return full_vector def extra_repr(self) -> str: - return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format( - self.num_embeddings, - self.num_additional_embeddings, - self.output_dim, - self.partially_freeze, - ) + return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.output_dim}, partially_freeze={self.partially_freeze}" class TFIdeficsDecoupledLinear(tf.keras.layers.Layer): @@ -431,13 +426,7 @@ class TFIdeficsDecoupledLinear(tf.keras.layers.Layer): def extra_repr(self) -> str: """Overwriting `nn.Linear.extra_repr` to include new parameters.""" - return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format( - self.in_features, - self.out_features, - self.out_additional_features, - self.bias is not None, - self.partially_freeze, - ) + return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}" @classmethod def from_config(cls, config): diff --git a/src/transformers/models/imagegpt/modeling_imagegpt.py b/src/transformers/models/imagegpt/modeling_imagegpt.py index ebcec593fe1..db5ae763aad 100755 --- a/src/transformers/models/imagegpt/modeling_imagegpt.py +++ b/src/transformers/models/imagegpt/modeling_imagegpt.py @@ -60,14 +60,14 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path): ) raise tf_path = os.path.abspath(imagegpt_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array.squeeze()) @@ -129,7 +129,7 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path): e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") if name[-1] == "q_proj": pointer.data[:, : config.n_embd] = torch.from_numpy(array.reshape(config.n_embd, config.n_embd)).T diff --git a/src/transformers/models/internvl/modeling_internvl.py b/src/transformers/models/internvl/modeling_internvl.py index 3958c780182..485adea8363 100644 --- a/src/transformers/models/internvl/modeling_internvl.py +++ b/src/transformers/models/internvl/modeling_internvl.py @@ -397,8 +397,8 @@ class InternVLVisionLayer(nn.Module): self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps) init_values = config.layer_scale_init_value - self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) - self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) + self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) + self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward( diff --git a/src/transformers/models/internvl/modular_internvl.py b/src/transformers/models/internvl/modular_internvl.py index 7ba30c92274..90576676b3c 100644 --- a/src/transformers/models/internvl/modular_internvl.py +++ b/src/transformers/models/internvl/modular_internvl.py @@ -348,8 +348,8 @@ class InternVLVisionLayer(nn.Module): self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps) init_values = config.layer_scale_init_value - self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) - self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True) + self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) + self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward( diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py index 6e2ef36612f..b69fc57b174 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py @@ -140,7 +140,7 @@ LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" """ -@lru_cache() +@lru_cache # Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/led/tokenization_led.py b/src/transformers/models/led/tokenization_led.py index ee01b3aec3b..d110ac30d96 100644 --- a/src/transformers/models/led/tokenization_led.py +++ b/src/transformers/models/led/tokenization_led.py @@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"} # See all LED models at https://huggingface.co/models?filter=LED -@lru_cache() +@lru_cache # Copied from transformers.models.bart.tokenization_bart.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/lightglue/modeling_lightglue.py b/src/transformers/models/lightglue/modeling_lightglue.py index 4df4888621e..5a7b969b94a 100644 --- a/src/transformers/models/lightglue/modeling_lightglue.py +++ b/src/transformers/models/lightglue/modeling_lightglue.py @@ -797,22 +797,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel): # Remove image pairs that have been early stopped from the forward pass num_points_per_pair = num_points_per_pair[~early_stopped_pairs] descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple( - ( - tensor[~early_stops] - for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices] - ) + tensor[~early_stops] + for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices] ) keypoints = (keypoints_0, keypoint_1) if do_keypoint_pruning: pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple( - ( - tensor[~early_stops] - for tensor in [ - pruned_keypoints_indices, - pruned_keypoints_iterations, - keypoint_confidences, - ] - ) + tensor[~early_stops] + for tensor in [ + pruned_keypoints_indices, + pruned_keypoints_iterations, + keypoint_confidences, + ] ) # If all pairs of images are early stopped, we stop the forward pass through the transformer # layers for all pairs of images. diff --git a/src/transformers/models/lightglue/modular_lightglue.py b/src/transformers/models/lightglue/modular_lightglue.py index 96a389194b4..bbdc595330b 100644 --- a/src/transformers/models/lightglue/modular_lightglue.py +++ b/src/transformers/models/lightglue/modular_lightglue.py @@ -871,22 +871,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel): # Remove image pairs that have been early stopped from the forward pass num_points_per_pair = num_points_per_pair[~early_stopped_pairs] descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple( - ( - tensor[~early_stops] - for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices] - ) + tensor[~early_stops] + for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices] ) keypoints = (keypoints_0, keypoint_1) if do_keypoint_pruning: pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple( - ( - tensor[~early_stops] - for tensor in [ - pruned_keypoints_indices, - pruned_keypoints_iterations, - keypoint_confidences, - ] - ) + tensor[~early_stops] + for tensor in [ + pruned_keypoints_indices, + pruned_keypoints_iterations, + keypoint_confidences, + ] ) # If all pairs of images are early stopped, we stop the forward pass through the transformer # layers for all pairs of images. diff --git a/src/transformers/models/llava/convert_llava_weights_to_hf.py b/src/transformers/models/llava/convert_llava_weights_to_hf.py index 3631de33af6..4d9609cb63a 100644 --- a/src/transformers/models/llava/convert_llava_weights_to_hf.py +++ b/src/transformers/models/llava/convert_llava_weights_to_hf.py @@ -161,13 +161,11 @@ def convert_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_path, o vocab_size = config.text_config.vocab_size model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape) model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack( - tuple( - (dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])) - ), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[vocab_size:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])), dim=0, ) diff --git a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py index 5d6e098d03c..8aee180dd77 100644 --- a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py +++ b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py @@ -175,15 +175,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape) model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack( tuple( - ( - dist.sample() - for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]) - ) + dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]) ), dim=0, ) model.language_model.lm_head.weight.data[vocab_size:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])), dim=0, ) diff --git a/src/transformers/models/llava_next_video/convert_llava_next_video_weights_to_hf.py b/src/transformers/models/llava_next_video/convert_llava_next_video_weights_to_hf.py index 2877b2e9dd8..265e543cb55 100644 --- a/src/transformers/models/llava_next_video/convert_llava_next_video_weights_to_hf.py +++ b/src/transformers/models/llava_next_video/convert_llava_next_video_weights_to_hf.py @@ -227,13 +227,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): num_tokens = vocab_size + 3 model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape) model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack( - tuple( - (dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])) - ), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[vocab_size:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])), dim=0, ) diff --git a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py index 8b0ebe03bf5..79bcad09ce1 100644 --- a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py +++ b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py @@ -176,13 +176,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): num_tokens = vocab_size + 2 model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape) model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack( - tuple( - (dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])) - ), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[vocab_size:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])), dim=0, ) diff --git a/src/transformers/models/longformer/tokenization_longformer.py b/src/transformers/models/longformer/tokenization_longformer.py index fb033493c35..104bdd7a9b9 100644 --- a/src/transformers/models/longformer/tokenization_longformer.py +++ b/src/transformers/models/longformer/tokenization_longformer.py @@ -30,7 +30,7 @@ logger = logging.get_logger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"} -@lru_cache() +@lru_cache # Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py index aae550e8d09..5e0e461862a 100644 --- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py @@ -127,7 +127,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p raise ValueError # Finally, save our PyTorch model and tokenizer - print("Saving PyTorch model to {}".format(pytorch_dump_folder_path)) + print(f"Saving PyTorch model to {pytorch_dump_folder_path}") model.save_pretrained(pytorch_dump_folder_path) diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index b91f4e8c2ed..8b330558778 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -130,7 +130,7 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" """ -@lru_cache() +@lru_cache # Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/mamba2/modeling_mamba2.py b/src/transformers/models/mamba2/modeling_mamba2.py index 638ae9ccbb4..17925c5acc0 100644 --- a/src/transformers/models/mamba2/modeling_mamba2.py +++ b/src/transformers/models/mamba2/modeling_mamba2.py @@ -606,7 +606,7 @@ class Mamba2Mixer(nn.Module): # 2. Compute the state for each intra-chunk # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None] states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2) diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index 9ad5bf688c4..a090e11ec36 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -89,7 +89,7 @@ MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" """ -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index d66ac53fd26..f8448936419 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -44,7 +44,7 @@ logger = logging.get_logger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"} -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index db942a3ac6e..3eb559dfcdb 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -1247,8 +1247,8 @@ class Mask2FormerPixelDecoder(nn.Module): nn.GroupNorm(32, feature_dim), nn.ReLU(), ) - self.add_module("adapter_{}".format(idx + 1), lateral_conv) - self.add_module("layer_{}".format(idx + 1), output_conv) + self.add_module(f"adapter_{idx + 1}", lateral_conv) + self.add_module(f"layer_{idx + 1}", output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index f4845424850..b7505aa6748 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -333,7 +333,7 @@ class MaskFormerSwinDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->MaskFormerSwin @@ -556,7 +556,7 @@ class MaskFormerSwinLayer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/megatron_bert/modeling_megatron_bert.py b/src/transformers/models/megatron_bert/modeling_megatron_bert.py index abbda408588..d22b1536081 100755 --- a/src/transformers/models/megatron_bert/modeling_megatron_bert.py +++ b/src/transformers/models/megatron_bert/modeling_megatron_bert.py @@ -62,7 +62,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] @@ -112,7 +112,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path): array = np.transpose(array) if pointer.shape != array.shape: raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched") - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model diff --git a/src/transformers/models/mgp_str/modeling_mgp_str.py b/src/transformers/models/mgp_str/modeling_mgp_str.py index 29e4d7981f8..f422ce24ef9 100644 --- a/src/transformers/models/mgp_str/modeling_mgp_str.py +++ b/src/transformers/models/mgp_str/modeling_mgp_str.py @@ -65,7 +65,7 @@ class MgpstrDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" @dataclass diff --git a/src/transformers/models/mgp_str/tokenization_mgp_str.py b/src/transformers/models/mgp_str/tokenization_mgp_str.py index 2df2dab7796..f29b8a8348e 100644 --- a/src/transformers/models/mgp_str/tokenization_mgp_str.py +++ b/src/transformers/models/mgp_str/tokenization_mgp_str.py @@ -89,7 +89,7 @@ class MgpstrTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/mllama/image_processing_mllama.py b/src/transformers/models/mllama/image_processing_mllama.py index 47a0fcad3ee..f918908c3d1 100644 --- a/src/transformers/models/mllama/image_processing_mllama.py +++ b/src/transformers/models/mllama/image_processing_mllama.py @@ -487,7 +487,7 @@ def to_channel_dimension_format( elif target_channel_dim == ChannelDimension.LAST: image = image.transpose((1, 2, 0)) else: - raise ValueError("Unsupported channel dimension format: {}".format(channel_dim)) + raise ValueError(f"Unsupported channel dimension format: {channel_dim}") return image diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py index 5a74d4114ac..1881e26e155 100644 --- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py @@ -179,7 +179,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p assert [e for e in multilingual_predicted_entities if e.startswith("en:")][0] == "en:Japan" # Finally, save our PyTorch model and tokenizer - print("Saving PyTorch model to {}".format(pytorch_dump_folder_path)) + print(f"Saving PyTorch model to {pytorch_dump_folder_path}") model.save_pretrained(pytorch_dump_folder_path) diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py index 485cbf5aa09..1fc473e0933 100644 --- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py @@ -60,7 +60,7 @@ def load_orig_config_file(orig_cfg_file): for k, v in flat_cfg.items(): setattr(config, k, v) except yaml.YAMLError as exc: - logger.error("Error while loading config file: {}. Error message: {}".format(orig_cfg_file, str(exc))) + logger.error(f"Error while loading config file: {orig_cfg_file}. Error message: {str(exc)}") return config diff --git a/src/transformers/models/mvp/tokenization_mvp.py b/src/transformers/models/mvp/tokenization_mvp.py index d8fe24d5408..f6039df2dc0 100644 --- a/src/transformers/models/mvp/tokenization_mvp.py +++ b/src/transformers/models/mvp/tokenization_mvp.py @@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"} # See all MVP models at https://huggingface.co/models?filter=mvp -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py index 77af9993033..3380118dd9e 100644 --- a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py @@ -1142,7 +1142,7 @@ def get_class_similarity(class_distance_type, cls_feature, class_proj): elif class_distance_type == "dot": class_logits = torch.bmm(cls_feature, class_proj) else: - raise Exception("Unknown class_distance_type {}".format(class_distance_type)) + raise Exception(f"Unknown class_distance_type {class_distance_type}") return class_logits diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py index bd740204b1b..d400e08cd18 100644 --- a/src/transformers/models/oneformer/modeling_oneformer.py +++ b/src/transformers/models/oneformer/modeling_oneformer.py @@ -1345,8 +1345,8 @@ class OneFormerPixelDecoder(nn.Module): nn.GroupNorm(32, config.conv_dim), nn.ReLU(), ) - self.add_module("adapter_{}".format(idx + 1), lateral_conv) - self.add_module("layer_{}".format(idx + 1), output_conv) + self.add_module(f"adapter_{idx + 1}", lateral_conv) + self.add_module(f"layer_{idx + 1}", output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) diff --git a/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py b/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py index 3334e6f28fc..62619ea1d39 100644 --- a/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py +++ b/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py @@ -346,13 +346,11 @@ def convert_paligemma2_checkpoint( # We add an image token so we resize the model model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape) model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack( - tuple( - (dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])) - ), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[257152:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])), dim=0, ) # convert to needed precision diff --git a/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py b/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py index 054872a799a..b46300f1bb2 100644 --- a/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py +++ b/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py @@ -279,11 +279,11 @@ def convert_paligemma_checkpoint( # We add an image token so we resize the model model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape) model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[257152:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])), dim=0, ) diff --git a/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py b/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py index 457c2236694..bec4cfa688a 100644 --- a/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py +++ b/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py @@ -139,7 +139,7 @@ def convert_pix2struct_original_pytorch_checkpoint_to_hf( model.save_pretrained(pytorch_dump_folder_path) processor.save_pretrained(pytorch_dump_folder_path) - print("Model saved in {}".format(pytorch_dump_folder_path)) + print(f"Model saved in {pytorch_dump_folder_path}") if __name__ == "__main__": diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index 94e3c68ba42..ea37c22e755 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -185,7 +185,7 @@ class PoolFormerImageProcessor(BaseImageProcessor): else: scale_size = (int(size["height"] / crop_pct), int(size["width"] / crop_pct)) else: - raise ValueError("Invalid size for resize: {}".format(size)) + raise ValueError(f"Invalid size for resize: {size}") output_size = get_resize_output_image_size( image, size=scale_size, default_to_square=False, input_data_format=input_data_format @@ -198,7 +198,7 @@ class PoolFormerImageProcessor(BaseImageProcessor): elif "height" in size and "width" in size: output_size = (size["height"], size["width"]) else: - raise ValueError("Invalid size for resize: {}".format(size)) + raise ValueError(f"Invalid size for resize: {size}") return resize( image, diff --git a/src/transformers/models/poolformer/image_processing_poolformer_fast.py b/src/transformers/models/poolformer/image_processing_poolformer_fast.py index 00b5107f956..4219aa6b576 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer_fast.py +++ b/src/transformers/models/poolformer/image_processing_poolformer_fast.py @@ -136,7 +136,7 @@ class PoolFormerImageProcessorFast(BaseImageProcessorFast): else: scale_size = (int(size.height / crop_pct), int(size.width / crop_pct)) else: - raise ValueError("Invalid size for resize: {}".format(size)) + raise ValueError(f"Invalid size for resize: {size}") new_size = get_resize_output_image_size( image, diff --git a/src/transformers/models/poolformer/modeling_poolformer.py b/src/transformers/models/poolformer/modeling_poolformer.py index ac6a27b3dd6..8e01e398720 100755 --- a/src/transformers/models/poolformer/modeling_poolformer.py +++ b/src/transformers/models/poolformer/modeling_poolformer.py @@ -65,7 +65,7 @@ class PoolFormerDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class PoolFormerEmbeddings(nn.Module): @@ -142,10 +142,10 @@ class PoolFormerLayer(nn.Module): self.use_layer_scale = config.use_layer_scale if config.use_layer_scale: self.layer_scale_1 = nn.Parameter( - config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True + config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True ) self.layer_scale_2 = nn.Parameter( - config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True + config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True ) def forward(self, hidden_states): diff --git a/src/transformers/models/pop2piano/tokenization_pop2piano.py b/src/transformers/models/pop2piano/tokenization_pop2piano.py index 433edc9186b..bf0c1afdfaa 100644 --- a/src/transformers/models/pop2piano/tokenization_pop2piano.py +++ b/src/transformers/models/pop2piano/tokenization_pop2piano.py @@ -404,7 +404,7 @@ class Pop2PianoTokenizer(PreTrainedTokenizer): notes = np.round(notes).astype(np.int32) max_time_idx = notes[:, :2].max() - times = [[] for i in range((max_time_idx + 1))] + times = [[] for i in range(max_time_idx + 1)] for onset, offset, pitch, velocity in notes: times[onset].append([pitch, velocity]) times[offset].append([pitch, 0]) diff --git a/src/transformers/models/pvt/modeling_pvt.py b/src/transformers/models/pvt/modeling_pvt.py index 2b776e83b87..5fb4f8269e4 100755 --- a/src/transformers/models/pvt/modeling_pvt.py +++ b/src/transformers/models/pvt/modeling_pvt.py @@ -71,7 +71,7 @@ class PvtDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class PvtPatchEmbeddings(nn.Module): diff --git a/src/transformers/models/pvt_v2/modeling_pvt_v2.py b/src/transformers/models/pvt_v2/modeling_pvt_v2.py index e378ce390ab..7c2f48bd580 100644 --- a/src/transformers/models/pvt_v2/modeling_pvt_v2.py +++ b/src/transformers/models/pvt_v2/modeling_pvt_v2.py @@ -69,7 +69,7 @@ class PvtV2DropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class PvtV2OverlapPatchEmbeddings(nn.Module): diff --git a/src/transformers/models/qwen2/tokenization_qwen2.py b/src/transformers/models/qwen2/tokenization_qwen2.py index 1b0725ee642..be121adb544 100644 --- a/src/transformers/models/qwen2/tokenization_qwen2.py +++ b/src/transformers/models/qwen2/tokenization_qwen2.py @@ -39,7 +39,7 @@ MAX_MODEL_INPUT_SIZES = {"qwen/qwen-tokenizer": 32768} PRETOKENIZE_REGEX = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""" -@lru_cache() +@lru_cache # Copied from transformers.models.gpt2.tokenization_gpt2.bytes_to_unicode def bytes_to_unicode(): """ diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index d6f2a0e4b5f..c4e151e9ce1 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -3866,7 +3866,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation check_torch_load_is_safe() for key, value in torch.load(path, weights_only=True).items(): self.speaker_map[key] = value - logger.info("Speaker {} loaded".format(list(self.speaker_map.keys()))) + logger.info(f"Speaker {list(self.speaker_map.keys())} loaded") def disable_talker(self): if hasattr(self, "talker"): diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py index 58b3b87ad38..10edb4e6a43 100644 --- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py @@ -4134,7 +4134,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation check_torch_load_is_safe() for key, value in torch.load(path, weights_only=True).items(): self.speaker_map[key] = value - logger.info("Speaker {} loaded".format(list(self.speaker_map.keys()))) + logger.info(f"Speaker {list(self.speaker_map.keys())} loaded") def disable_talker(self): if hasattr(self, "talker"): diff --git a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py index 622d507080e..369388c540f 100755 --- a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py @@ -28,14 +28,14 @@ logging.set_verbosity_info() def convert_rembert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = RemBertConfig.from_json_file(bert_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {str(config)}") model = RemBertModel(config) # Load weights from tf checkpoint load_tf_weights_in_rembert(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 2ceba35cf19..c45844047f6 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -535,7 +535,7 @@ class TFRemBertEncoder(keras.layers.Layer): kernel_initializer=get_initializer(config.initializer_range), name="embedding_hidden_mapping_in", ) - self.layer = [TFRemBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFRemBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/rembert/tokenization_rembert.py b/src/transformers/models/rembert/tokenization_rembert.py index 7a424f1481f..cf27a7b3bae 100644 --- a/src/transformers/models/rembert/tokenization_rembert.py +++ b/src/transformers/models/rembert/tokenization_rembert.py @@ -218,7 +218,7 @@ class RemBertTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/rembert/tokenization_rembert_fast.py b/src/transformers/models/rembert/tokenization_rembert_fast.py index b4328543601..fb358746e6d 100644 --- a/src/transformers/models/rembert/tokenization_rembert_fast.py +++ b/src/transformers/models/rembert/tokenization_rembert_fast.py @@ -183,7 +183,7 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/roberta/tokenization_roberta.py b/src/transformers/models/roberta/tokenization_roberta.py index c78d9831f77..67cdcbbf488 100644 --- a/src/transformers/models/roberta/tokenization_roberta.py +++ b/src/transformers/models/roberta/tokenization_roberta.py @@ -33,7 +33,7 @@ VOCAB_FILES_NAMES = { } -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control diff --git a/src/transformers/models/sam/modeling_sam.py b/src/transformers/models/sam/modeling_sam.py index 28e28f07d2d..a9088958a8f 100644 --- a/src/transformers/models/sam/modeling_sam.py +++ b/src/transformers/models/sam/modeling_sam.py @@ -1442,21 +1442,19 @@ class SamModel(SamPreTrainedModel): if input_points is not None and len(input_points.shape) != 4: raise ValueError( "The input_points must be a 4D tensor. Of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.", - " got {}.".format(input_points.shape), + f" got {input_points.shape}.", ) if input_boxes is not None and len(input_boxes.shape) != 3: raise ValueError( "The input_points must be a 3D tensor. Of shape `batch_size`, `nb_boxes`, `4`.", - " got {}.".format(input_boxes.shape), + f" got {input_boxes.shape}.", ) if input_points is not None and input_boxes is not None: point_batch_size = input_points.shape[1] box_batch_size = input_boxes.shape[1] if point_batch_size != box_batch_size: raise ValueError( - "You should provide as many bounding boxes as input points per box. Got {} and {}.".format( - point_batch_size, box_batch_size - ) + f"You should provide as many bounding boxes as input points per box. Got {point_batch_size} and {box_batch_size}." ) image_positional_embeddings = self.get_image_wide_positional_embeddings() @@ -1486,7 +1484,7 @@ class SamModel(SamPreTrainedModel): if input_points is not None and image_embeddings.shape[0] != input_points.shape[0]: raise ValueError( "The batch size of the image embeddings and the input points must be the same. ", - "Got {} and {} respectively.".format(image_embeddings.shape[0], input_points.shape[0]), + f"Got {image_embeddings.shape[0]} and {input_points.shape[0]} respectively.", " if you want to pass multiple points for the same image, make sure that you passed ", " input_points of shape (batch_size, point_batch_size, num_points_per_image, 3) and ", " input_labels of shape (batch_size, point_batch_size, num_points_per_image)", diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index c44dfdf80ac..69afaf2b89c 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -1597,21 +1597,19 @@ class TFSamModel(TFSamPreTrainedModel): if input_points is not None and len(input_points.shape) != 4: raise ValueError( "The input_points must be a 4D tensor. Of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.", - " got {}.".format(input_points.shape), + f" got {input_points.shape}.", ) if input_boxes is not None and len(input_boxes.shape) != 3: raise ValueError( "The input_points must be a 3D tensor. Of shape `batch_size`, `nb_boxes`, `4`.", - " got {}.".format(input_boxes.shape), + f" got {input_boxes.shape}.", ) if input_points is not None and input_boxes is not None: point_batch_size = shape_list(input_points)[1] box_batch_size = shape_list(input_boxes)[1] if point_batch_size != box_batch_size: raise ValueError( - "You should provide as many bounding boxes as input points per box. Got {} and {}.".format( - point_batch_size, box_batch_size - ) + f"You should provide as many bounding boxes as input points per box. Got {point_batch_size} and {box_batch_size}." ) if pixel_values is not None: # Ensures that later checks pass even with an all-None shape from the serving signature @@ -1653,7 +1651,7 @@ class TFSamModel(TFSamPreTrainedModel): if input_points is not None and image_embeddings.shape[0] != input_points.shape[0]: raise ValueError( "The batch size of the image embeddings and the input points must be the same. ", - "Got {} and {} respectively.".format(image_embeddings.shape[0], input_points.shape[0]), + f"Got {image_embeddings.shape[0]} and {input_points.shape[0]} respectively.", " if you want to pass multiple points for the same image, make sure that you passed ", " input_points of shape (batch_size, point_batch_size, num_points_per_image, 3) and ", " input_labels of shape (batch_size, point_batch_size, num_points_per_image)", diff --git a/src/transformers/models/sam_hq/modeling_sam_hq.py b/src/transformers/models/sam_hq/modeling_sam_hq.py index 14f30fb4b40..20391169855 100644 --- a/src/transformers/models/sam_hq/modeling_sam_hq.py +++ b/src/transformers/models/sam_hq/modeling_sam_hq.py @@ -1587,9 +1587,7 @@ class SamHQModel(SamHQPreTrainedModel): box_batch_size = input_boxes.shape[1] if point_batch_size != box_batch_size: raise ValueError( - "You should provide as many bounding boxes as input points per box. Got {} and {}.".format( - point_batch_size, box_batch_size - ) + f"You should provide as many bounding boxes as input points per box. Got {point_batch_size} and {box_batch_size}." ) image_positional_embeddings = self.get_image_wide_positional_embeddings() diff --git a/src/transformers/models/sam_hq/modular_sam_hq.py b/src/transformers/models/sam_hq/modular_sam_hq.py index 45d8067d83a..a78ce712cc0 100644 --- a/src/transformers/models/sam_hq/modular_sam_hq.py +++ b/src/transformers/models/sam_hq/modular_sam_hq.py @@ -615,9 +615,7 @@ class SamHQModel(SamModel): box_batch_size = input_boxes.shape[1] if point_batch_size != box_batch_size: raise ValueError( - "You should provide as many bounding boxes as input points per box. Got {} and {}.".format( - point_batch_size, box_batch_size - ) + f"You should provide as many bounding boxes as input points per box. Got {point_batch_size} and {box_batch_size}." ) image_positional_embeddings = self.get_image_wide_positional_embeddings() diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index 03136936e60..24f02c3e6b4 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -2434,7 +2434,7 @@ class SeamlessM4TCodeHifiGan(PreTrainedModel): lang = self.language_embedding(lang_id).transpose(1, 2) log_dur_pred = self.dur_predictor(hidden_states.transpose(1, 2)) - dur_out = torch.clamp(torch.round((torch.expm1(log_dur_pred))).long(), min=1) + dur_out = torch.clamp(torch.round(torch.expm1(log_dur_pred)).long(), min=1) # B x C x T if hidden_states.size(0) == 1: hidden_states = torch.repeat_interleave(hidden_states, dur_out.view(-1), dim=2) diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index b6047a10da6..95c586bfd76 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -2110,7 +2110,7 @@ class SeamlessM4Tv2TextToUnitDecoder(SeamlessM4Tv2PreTrainedModel): # predict duration log_dur_pred = self.duration_predictor(char_hidden_states, padding_mask=char_padding_mask) - dur_out = torch.clamp(torch.round((torch.expm1(log_dur_pred))).long(), min=1) + dur_out = torch.clamp(torch.round(torch.expm1(log_dur_pred)).long(), min=1) dur_out = dur_out.masked_fill(~char_padding_mask.bool(), 0.0) # upsample char hidden states according to predicted duration @@ -2675,7 +2675,7 @@ class SeamlessM4Tv2CodeHifiGan(PreTrainedModel): lang = self.language_embedding(lang_id).transpose(1, 2) log_dur_pred = self.dur_predictor(hidden_states.transpose(1, 2)) - dur_out = torch.clamp(torch.round((torch.expm1(log_dur_pred))).long(), min=1) + dur_out = torch.clamp(torch.round(torch.expm1(log_dur_pred)).long(), min=1) # B x C x T if hidden_states.size(0) == 1: hidden_states = torch.repeat_interleave(hidden_states, dur_out.view(-1), dim=2) diff --git a/src/transformers/models/segformer/modeling_segformer.py b/src/transformers/models/segformer/modeling_segformer.py index 959601aa57a..81c22044610 100755 --- a/src/transformers/models/segformer/modeling_segformer.py +++ b/src/transformers/models/segformer/modeling_segformer.py @@ -93,7 +93,7 @@ class SegformerDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class SegformerOverlapPatchEmbeddings(nn.Module): diff --git a/src/transformers/models/seggpt/modeling_seggpt.py b/src/transformers/models/seggpt/modeling_seggpt.py index 9a7d830476b..69c5ce88f7a 100644 --- a/src/transformers/models/seggpt/modeling_seggpt.py +++ b/src/transformers/models/seggpt/modeling_seggpt.py @@ -392,7 +392,7 @@ class SegGptDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class SegGptLayer(nn.Module): diff --git a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py index df0cae2a3b2..be8b507b60b 100644 --- a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py @@ -263,7 +263,7 @@ def convert_sew_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) with open(vocab_path, "w", encoding="utf-8") as vocab_handle: diff --git a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py index 1540efa4be1..b4dfd7bd2ad 100644 --- a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py @@ -275,7 +275,7 @@ def convert_sew_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) with open(vocab_path, "w", encoding="utf-8") as vocab_handle: diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py index 24b83368215..c63426468d2 100644 --- a/src/transformers/models/speecht5/modeling_speecht5.py +++ b/src/transformers/models/speecht5/modeling_speecht5.py @@ -404,7 +404,7 @@ class SpeechT5ScaledPositionalEncoding(nn.Module): def __init__(self, dropout, dim, max_len=5000): pe = torch.zeros(max_len, dim) position = torch.arange(0, max_len).unsqueeze(1) - div_term = torch.exp((torch.arange(0, dim, 2, dtype=torch.int64).float() * -(math.log(10000.0) / dim))) + div_term = torch.exp(torch.arange(0, dim, 2, dtype=torch.int64).float() * -(math.log(10000.0) / dim)) pe[:, 0::2] = torch.sin(position.float() * div_term) pe[:, 1::2] = torch.cos(position.float() * div_term) pe = pe.unsqueeze(0) diff --git a/src/transformers/models/swiftformer/modeling_swiftformer.py b/src/transformers/models/swiftformer/modeling_swiftformer.py index 10e626131a6..5d9707dea82 100644 --- a/src/transformers/models/swiftformer/modeling_swiftformer.py +++ b/src/transformers/models/swiftformer/modeling_swiftformer.py @@ -91,7 +91,7 @@ class SwiftFormerDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class SwiftFormerEmbeddings(nn.Module): diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 99fa5b45dbc..a8c29e84785 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -433,7 +433,7 @@ class SwinDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class SwinSelfAttention(nn.Module): @@ -659,7 +659,7 @@ class SwinLayer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index 5b9f9f272ff..b5dd36f1cac 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -795,8 +795,8 @@ class TFSwinLayer(keras.layers.Layer): mask_windows = window_partition(img_mask, window_size) mask_windows = tf.reshape(mask_windows, (-1, window_size * window_size)) attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2) - attn_mask = tf.where(attn_mask != 0, float(-100.0), attn_mask) - attn_mask = tf.where(attn_mask == 0, float(0.0), attn_mask) + attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask) + attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask) return attn_mask def maybe_pad( diff --git a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py index 29fe2e3e25d..192e58d9db0 100644 --- a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py +++ b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py @@ -170,7 +170,7 @@ def convert_swin2sr_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False) if len(missing_keys) > 0: - raise ValueError("Missing keys when converting: {}".format(missing_keys)) + raise ValueError(f"Missing keys when converting: {missing_keys}") for key in unexpected_keys: if not ("relative_position_index" in key or "relative_coords_table" in key or "self_mask" in key): raise ValueError(f"Unexpected key {key} in state_dict") diff --git a/src/transformers/models/swin2sr/modeling_swin2sr.py b/src/transformers/models/swin2sr/modeling_swin2sr.py index 9a6030b4a0f..c63579a014f 100644 --- a/src/transformers/models/swin2sr/modeling_swin2sr.py +++ b/src/transformers/models/swin2sr/modeling_swin2sr.py @@ -117,7 +117,7 @@ class Swin2SRDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class Swin2SREmbeddings(nn.Module): @@ -521,7 +521,7 @@ class Swin2SRLayer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index 0ad4821f0c3..050e8d3fd27 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -250,7 +250,7 @@ class Swinv2DropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Copied from transformers.models.swin.modeling_swin.SwinEmbeddings with Swin->Swinv2 @@ -716,7 +716,7 @@ class Swinv2Layer(nn.Module): mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0) else: attn_mask = None return attn_mask diff --git a/src/transformers/models/switch_transformers/modeling_switch_transformers.py b/src/transformers/models/switch_transformers/modeling_switch_transformers.py index 83331231133..60dc8fb7a55 100644 --- a/src/transformers/models/switch_transformers/modeling_switch_transformers.py +++ b/src/transformers/models/switch_transformers/modeling_switch_transformers.py @@ -307,7 +307,7 @@ class SwitchTransformersSparseMLP(nn.Module): 0 ].tolist() # length: number of "activated" expert / value: index for idx in idx_mask: - next_states[router_mask[:, :, idx]] = getattr(self.experts, "expert_{}".format(idx))( + next_states[router_mask[:, :, idx]] = getattr(self.experts, f"expert_{idx}")( hidden_states[router_mask[:, :, idx]] ) diff --git a/src/transformers/models/timesformer/modeling_timesformer.py b/src/transformers/models/timesformer/modeling_timesformer.py index 19358f0c30e..00592039a92 100644 --- a/src/transformers/models/timesformer/modeling_timesformer.py +++ b/src/transformers/models/timesformer/modeling_timesformer.py @@ -179,7 +179,7 @@ class TimeSformerDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" # Adapted from https://github.com/facebookresearch/TimeSformer/blob/a5ef29a7b7264baff199a30b3306ac27de901133/timesformer/models/vit.py#L57 @@ -309,7 +309,7 @@ class TimesformerLayer(nn.Module): self.config = config self.attention_type = attention_type if attention_type not in ["divided_space_time", "space_only", "joint_space_time"]: - raise ValueError("Unknown attention type: {}".format(attention_type)) + raise ValueError(f"Unknown attention type: {attention_type}") # Temporal Attention Parameters if self.attention_type == "divided_space_time": diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 4cca934bcea..79c5c2ca399 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -1219,7 +1219,7 @@ class UdopStack(UdopPreTrainedModel): batch_size, seq_length = input_shape if use_cache is True: - assert self.is_decoder, "`use_cache` can only be set to `True` if {} is used as a decoder".format(self) + assert self.is_decoder, f"`use_cache` can only be set to `True` if {self} is used as a decoder" # initialize past_key_values return_legacy_cache = False diff --git a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py index 4eb8dfa7bbd..b17d304a1e4 100644 --- a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py @@ -210,7 +210,7 @@ def convert_unispeech_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) vocab_dict = target_dict.indices diff --git a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py index ecb5cfa4e12..4f0f56f21bf 100644 --- a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py +++ b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py @@ -116,11 +116,11 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p # We add an image and video token so we resize the model model.resize_token_embeddings(config.text_config.vocab_size + 3, pad_shape) model.language_model.model.embed_tokens.weight.data[32000:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[32000:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[32000:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[32000:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[32000:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[32000:].shape[0])), dim=0, ) diff --git a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py index f0fa69ab872..47f58cc6e10 100644 --- a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py +++ b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py @@ -92,11 +92,11 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path # We add an image token so we resize the model model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape) model.language_model.model.embed_tokens.weight.data[32000:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[32000:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[32000:].shape[0])), dim=0, ) model.language_model.lm_head.weight.data[32000:] = torch.stack( - tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[32000:].shape[0]))), + tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[32000:].shape[0])), dim=0, ) diff --git a/src/transformers/models/vitdet/modeling_vitdet.py b/src/transformers/models/vitdet/modeling_vitdet.py index d327650c065..e13e36d08e2 100644 --- a/src/transformers/models/vitdet/modeling_vitdet.py +++ b/src/transformers/models/vitdet/modeling_vitdet.py @@ -294,7 +294,7 @@ class VitDetDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class VitDetLayerNorm(nn.Module): diff --git a/src/transformers/models/vjepa2/modeling_vjepa2.py b/src/transformers/models/vjepa2/modeling_vjepa2.py index bb139d4aeee..107d3819f41 100644 --- a/src/transformers/models/vjepa2/modeling_vjepa2.py +++ b/src/transformers/models/vjepa2/modeling_vjepa2.py @@ -402,7 +402,7 @@ class VJEPA2DropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class VJEPA2MLP(nn.Module): diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py index 5613f83a86b..1e0f00f4775 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py @@ -307,7 +307,7 @@ def convert_wav2vec2_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) vocab_dict = target_dict.indices diff --git a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py index 8c435c6cd92..ce54a1756ab 100644 --- a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py @@ -243,7 +243,7 @@ def convert_wav2vec2_conformer_checkpoint( config.vocab_size = len(target_dict.symbols) vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json") if not os.path.isdir(pytorch_dump_folder_path): - logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path)) + logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory") return os.makedirs(pytorch_dump_folder_path, exist_ok=True) vocab_dict = target_dict.indices diff --git a/src/transformers/models/x_clip/modeling_x_clip.py b/src/transformers/models/x_clip/modeling_x_clip.py index 6c98381a63b..41db6f5ce85 100644 --- a/src/transformers/models/x_clip/modeling_x_clip.py +++ b/src/transformers/models/x_clip/modeling_x_clip.py @@ -421,7 +421,7 @@ class XCLIPDropPath(nn.Module): return drop_path(hidden_states, self.drop_prob, self.training) def extra_repr(self) -> str: - return "p={}".format(self.drop_prob) + return f"p={self.drop_prob}" class XCLIPVisionEncoderLayer(nn.Module): diff --git a/src/transformers/models/xlnet/tokenization_xlnet.py b/src/transformers/models/xlnet/tokenization_xlnet.py index 330eec9d845..9186db33d78 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet.py +++ b/src/transformers/models/xlnet/tokenization_xlnet.py @@ -211,7 +211,7 @@ class XLNetTokenizer(PreTrainedTokenizer): pieces = self.sp_model.encode(text, out_type=str) new_pieces = [] for piece in pieces: - if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit(): + if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, "")) if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: if len(cur_pieces[0]) == 1: diff --git a/src/transformers/models/zamba/modeling_zamba.py b/src/transformers/models/zamba/modeling_zamba.py index ec6803e2d12..ea832692b7b 100644 --- a/src/transformers/models/zamba/modeling_zamba.py +++ b/src/transformers/models/zamba/modeling_zamba.py @@ -332,12 +332,10 @@ class ZambaMambaMixer(nn.Module): # weight associated to the selective projection used to make dt, B and C input dependent # each mamba head is processed independently self.x_proj_weight = nn.Parameter( - ( - torch.zeros( - self.n_mamba_heads, - self.time_step_rank + self.ssm_state_size * 2, - self.mamba_head_dim, - ) + torch.zeros( + self.n_mamba_heads, + self.time_step_rank + self.ssm_state_size * 2, + self.mamba_head_dim, ) ) # time step projection (discretization) diff --git a/src/transformers/models/zamba2/modeling_zamba2.py b/src/transformers/models/zamba2/modeling_zamba2.py index 25f64745537..ecd0abcb026 100644 --- a/src/transformers/models/zamba2/modeling_zamba2.py +++ b/src/transformers/models/zamba2/modeling_zamba2.py @@ -881,7 +881,7 @@ class Zamba2MambaMixer(nn.Module): # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay_contraction = B * decay_states.permute(0, 2, 3, 1)[..., None] # permute back B * decay states states = (B_decay_contraction.permute(0, 1, 3, 2, 4)[..., None] * hidden_states.permute(0, 1, 3, 2, 4)[..., None, :]).sum(dim=3).permute(0, 1, 2, 4, 3) diff --git a/src/transformers/models/zamba2/modular_zamba2.py b/src/transformers/models/zamba2/modular_zamba2.py index 5c5c3a5599b..a89ab2729f2 100644 --- a/src/transformers/models/zamba2/modular_zamba2.py +++ b/src/transformers/models/zamba2/modular_zamba2.py @@ -660,7 +660,7 @@ class Zamba2MambaMixer(nn.Module): # (right term of low-rank factorization of off-diagonal blocks; B terms) - decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum)) + decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum) B_decay_contraction = B * decay_states.permute(0, 2, 3, 1)[..., None] # permute back B * decay states states = (B_decay_contraction.permute(0, 1, 3, 2, 4)[..., None] * hidden_states.permute(0, 1, 3, 2, 4)[..., None, :]).sum(dim=3).permute(0, 1, 2, 4, 3) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 79decc50d91..760f092f0e2 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -16,7 +16,7 @@ import json import os import warnings from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Optional, Union from huggingface_hub import model_info diff --git a/src/transformers/pipelines/zero_shot_classification.py b/src/transformers/pipelines/zero_shot_classification.py index 42773df2ad1..c23f1c544cb 100644 --- a/src/transformers/pipelines/zero_shot_classification.py +++ b/src/transformers/pipelines/zero_shot_classification.py @@ -27,10 +27,8 @@ class ZeroShotClassificationArgumentHandler(ArgumentHandler): raise ValueError("You must include at least one label and at least one sequence.") if hypothesis_template.format(labels[0]) == hypothesis_template: raise ValueError( - ( - 'The provided hypothesis_template "{}" was not able to be formatted with the target labels. ' - "Make sure the passed template includes formatting syntax such as {{}} where the label should go." - ).format(hypothesis_template) + f'The provided hypothesis_template "{hypothesis_template}" was not able to be formatted with the target labels. ' + "Make sure the passed template includes formatting syntax such as {} where the label should go." ) if isinstance(sequences, str): diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index bd0affc9ad3..b3a2933a2ed 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -31,10 +31,10 @@ import sys import tempfile import time import warnings -from collections.abc import Mapping +from collections.abc import Iterator, Mapping from functools import partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union # Integrations must be imported before ML frameworks: diff --git a/src/transformers/utils/args_doc.py b/src/transformers/utils/args_doc.py index da8243ff168..cdf39d8a5f9 100644 --- a/src/transformers/utils/args_doc.py +++ b/src/transformers/utils/args_doc.py @@ -861,7 +861,7 @@ def format_args_docstring(args, model_name): deducted from the model name and the auto modules. """ # first check if there are any placeholders in the args, if not return them as is - placeholders = set(re.findall(r"{(.*?)}", "".join((args[arg]["description"] for arg in args)))) + placeholders = set(re.findall(r"{(.*?)}", "".join(args[arg]["description"] for arg in args))) if not placeholders: return args diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index f4dfdec73e8..6ae1b36d0bb 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -590,7 +590,7 @@ def is_torch_bf16_available(): return is_torch_bf16_gpu_available() -@lru_cache() +@lru_cache def is_torch_fp16_available_on_device(device): if not is_torch_available(): return False @@ -622,7 +622,7 @@ def is_torch_fp16_available_on_device(device): return True -@lru_cache() +@lru_cache def is_torch_bf16_available_on_device(device): if not is_torch_available(): return False @@ -731,14 +731,14 @@ def is_torch_xla_available(check_is_tpu=False, check_is_gpu=False): return True -@lru_cache() +@lru_cache def is_torch_neuroncore_available(check_device=True): if importlib.util.find_spec("torch_neuronx") is not None: return is_torch_xla_available() return False -@lru_cache() +@lru_cache def is_torch_npu_available(check_device=False): "Checks if `torch_npu` is installed and potentially if a NPU is in the environment" if not _torch_available or importlib.util.find_spec("torch_npu") is None: @@ -757,7 +757,7 @@ def is_torch_npu_available(check_device=False): return hasattr(torch, "npu") and torch.npu.is_available() -@lru_cache() +@lru_cache def is_torch_mlu_available(check_device=False): """ Checks if `mlu` is available via an `cndev-based` check which won't trigger the drivers and leave mlu @@ -782,7 +782,7 @@ def is_torch_mlu_available(check_device=False): return available -@lru_cache() +@lru_cache def is_torch_musa_available(check_device=False): "Checks if `torch_musa` is installed and potentially if a MUSA is in the environment" if not _torch_available or importlib.util.find_spec("torch_musa") is None: @@ -1020,7 +1020,7 @@ def is_torch_xpu_available(check_device=False): return hasattr(torch, "xpu") and torch.xpu.is_available() -@lru_cache() +@lru_cache def is_bitsandbytes_available(check_library_only=False) -> bool: if not _bitsandbytes_available: return False @@ -1075,7 +1075,7 @@ def is_flash_attn_2_available(): return False -@lru_cache() +@lru_cache def is_flash_attn_greater_or_equal_2_10(): if not _is_package_available("flash_attn"): return False @@ -1083,7 +1083,7 @@ def is_flash_attn_greater_or_equal_2_10(): return version.parse(importlib.metadata.version("flash_attn")) >= version.parse("2.1.0") -@lru_cache() +@lru_cache def is_flash_attn_greater_or_equal(library_version: str): if not _is_package_available("flash_attn"): return False @@ -1091,7 +1091,7 @@ def is_flash_attn_greater_or_equal(library_version: str): return version.parse(importlib.metadata.version("flash_attn")) >= version.parse(library_version) -@lru_cache() +@lru_cache def is_torch_greater_or_equal(library_version: str, accept_dev: bool = False): """ Accepts a library version and returns True if the current version of the library is greater than or equal to the @@ -1109,7 +1109,7 @@ def is_torch_greater_or_equal(library_version: str, accept_dev: bool = False): return version.parse(importlib.metadata.version("torch")) >= version.parse(library_version) -@lru_cache() +@lru_cache def is_huggingface_hub_greater_or_equal(library_version: str, accept_dev: bool = False): if not _is_package_available("huggingface_hub"): return False @@ -2160,7 +2160,7 @@ class VersionComparison(Enum): return string_to_operator[version_string] -@lru_cache() +@lru_cache def split_package_version(package_version_str) -> tuple[str, str, str]: pattern = r"([a-zA-Z0-9_-]+)([!<>=~]+)([0-9.]+)" match = re.match(pattern, package_version_str) @@ -2275,7 +2275,7 @@ def fetch__all__(file_content): return _all -@lru_cache() +@lru_cache def create_import_structure_from_path(module_path): """ This method takes the path to a file/a folder and returns the import structure. @@ -2603,7 +2603,7 @@ def spread_import_structure(nested_import_structure): return flattened_import_structure -@lru_cache() +@lru_cache def define_import_structure(module_path: str, prefix: Optional[str] = None) -> IMPORT_STRUCTURE_T: """ This method takes a module_path as input and creates an import structure digestible by a _LazyModule. diff --git a/src/transformers/utils/sentencepiece_model_pb2.py b/src/transformers/utils/sentencepiece_model_pb2.py index b4b2992a630..8f063575fd7 100644 --- a/src/transformers/utils/sentencepiece_model_pb2.py +++ b/src/transformers/utils/sentencepiece_model_pb2.py @@ -342,7 +342,7 @@ _TRAINERSPEC = _descriptor.Descriptor( cpp_type=6, label=1, has_default_value=True, - default_value=float(0.9995), + default_value=0.9995, message_type=None, enum_type=None, containing_type=None, @@ -456,7 +456,7 @@ _TRAINERSPEC = _descriptor.Descriptor( cpp_type=6, label=1, has_default_value=True, - default_value=float(0.75), + default_value=0.75, message_type=None, enum_type=None, containing_type=None, diff --git a/tests/utils/test_video_utils.py b/tests/utils/test_video_utils.py index 21a5b44ff8e..74f81cfe362 100644 --- a/tests/utils/test_video_utils.py +++ b/tests/utils/test_video_utils.py @@ -45,7 +45,7 @@ if is_vision_available(): def get_random_video(height, width, num_frames=8, return_torch=False): random_frame = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) - video = np.array(([random_frame] * num_frames)) + video = np.array([random_frame] * num_frames) if return_torch: # move channel first return torch.from_numpy(video).permute(0, 3, 1, 2) diff --git a/utils/check_copies.py b/utils/check_copies.py index d50d244ebe2..d53d1fc9ca1 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -1023,7 +1023,7 @@ def convert_to_localized_md(model_list: str, localized_model_list: str, format_s sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower()) - return readmes_match, "\n".join((x[1] for x in sorted_index)) + "\n" + return readmes_match, "\n".join(x[1] for x in sorted_index) + "\n" # Map a model name with the name it has in the README for the check_readme check