diff --git a/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb b/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb index e159549a105..0c27bd02a7d 100644 --- a/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb +++ b/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb @@ -41,7 +41,7 @@ "from scipy import sparse\n", "from torch import nn\n", "\n", - "from transformers import *\n", + "from transformers import BertForQuestionAnswering\n", "\n", "\n", "os.chdir(\"../../\")" @@ -307,7 +307,7 @@ " print(f\"Skip {name}\")\n", " continue\n", "\n", - " if type(param) == torch.Tensor:\n", + " if isinstance(param, torch.Tensor):\n", " if param.numel() == 1:\n", " # module scale\n", " # module zero_point\n", @@ -319,13 +319,13 @@ " param = param.detach().numpy()\n", " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", "\n", - " elif type(param) == float or type(param) == int or type(param) == tuple:\n", + " elif isinstance(param, (float, int, tuple)):\n", " # float - tensor _packed_params.weight.scale\n", " # int - tensor _packed_params.weight.zero_point\n", " # tuple - tensor _packed_params.weight.shape\n", " hf.attrs[name] = param\n", "\n", - " elif type(param) == torch.dtype:\n", + " elif isinstance(param, torch.dtype):\n", " # dtype - tensor _packed_params.dtype\n", " hf.attrs[name] = dtype_2_str[param]\n", "\n", @@ -370,7 +370,7 @@ " # print(f\"Skip {name}\")\n", " # continue\n", "\n", - " if type(param) == torch.Tensor:\n", + " if isinstance(param, torch.Tensor):\n", " if param.numel() == 1:\n", " # module scale\n", " # module zero_point\n", @@ -382,13 +382,13 @@ " param = param.detach().numpy()\n", " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", "\n", - " elif type(param) == float or type(param) == int or type(param) == tuple:\n", + " elif isinstance(param, (float, int, tuple)):\n", " # float - tensor _packed_params.weight.scale\n", " # int - tensor _packed_params.weight.zero_point\n", " # tuple - tensor _packed_params.weight.shape\n", " hf.attrs[name] = param\n", "\n", - " elif type(param) == torch.dtype:\n", + " elif isinstance(param, torch.dtype):\n", " # dtype - tensor _packed_params.dtype\n", " hf.attrs[name] = dtype_2_str[param]\n", "\n", @@ -471,10 +471,10 @@ " assert name in reconstructed_elementary_qtz_st, name\n", "\n", "for name, param in reconstructed_elementary_qtz_st.items():\n", - " assert type(param) == type(elementary_qtz_st[name]), name\n", - " if type(param) == torch.Tensor:\n", + " assert isinstance(param, type(elementary_qtz_st[name])), name\n", + " if isinstance(param, torch.Tensor):\n", " assert torch.all(torch.eq(param, elementary_qtz_st[name])), name\n", - " elif type(param) == np.ndarray:\n", + " elif isinstance(param, np.ndarray):\n", " assert (param == elementary_qtz_st[name]).all(), name\n", " else:\n", " assert param == elementary_qtz_st[name], name" @@ -532,10 +532,10 @@ " assert name in reconstructed_qtz_st, name\n", "\n", "for name, param in reconstructed_qtz_st.items():\n", - " assert type(param) == type(qtz_st[name]), name\n", - " if type(param) == torch.Tensor:\n", + " assert isinstance(param, type(qtz_st[name])), name\n", + " if isinstance(param, torch.Tensor):\n", " assert torch.all(torch.eq(param, qtz_st[name])), name\n", - " elif type(param) == np.ndarray:\n", + " elif isinstance(param, np.ndarray):\n", " assert (param == qtz_st[name]).all(), name\n", " else:\n", " assert param == qtz_st[name], name" diff --git a/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py b/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py index a9fe0d75f5c..3d726767742 100644 --- a/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +++ b/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py @@ -114,7 +114,7 @@ class ASTSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py index 0f642c5e8e8..b4b116bdfb0 100755 --- a/src/transformers/models/beit/modeling_beit.py +++ b/src/transformers/models/beit/modeling_beit.py @@ -270,7 +270,7 @@ class BeitSelfAttention(nn.Module): self.config = config if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {(config.hidden_size,)} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py index 2d79c182008..d37eedea3f4 100644 --- a/src/transformers/models/beit/modeling_flax_beit.py +++ b/src/transformers/models/beit/modeling_flax_beit.py @@ -271,7 +271,7 @@ class FlaxBeitSelfAttention(nn.Module): self.config, "embedding_size" ): raise ValueError( - f"The hidden size {self.config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {self.config.hidden_size} is not a multiple of the number of attention " f"heads {self.config.num_attention_heads}." ) diff --git a/src/transformers/models/data2vec/modeling_data2vec_vision.py b/src/transformers/models/data2vec/modeling_data2vec_vision.py index 1b6834a5179..c86495cbbe2 100644 --- a/src/transformers/models/data2vec/modeling_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_data2vec_vision.py @@ -271,7 +271,7 @@ class Data2VecVisionSelfAttention(nn.Module): self.config = config if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {(config.hidden_size,)} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index dfb7753d6f9..66a556da818 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -186,7 +186,7 @@ class DeiTSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py index 7f82aacf6e8..aab3d4ff2de 100644 --- a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py @@ -345,7 +345,7 @@ class TvltSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py index dca17adf2b0..922d5fab9be 100644 --- a/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py @@ -204,7 +204,7 @@ class ViTHybridSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py index 33ec1c05499..3ba48b7026c 100644 --- a/src/transformers/models/dinov2/modeling_dinov2.py +++ b/src/transformers/models/dinov2/modeling_dinov2.py @@ -178,7 +178,7 @@ class Dinov2SelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py index bd9d181cdf3..dae5904b78e 100644 --- a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +++ b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py @@ -190,7 +190,7 @@ class Dinov2WithRegistersSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index a82227b4580..e4d55603e63 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -301,7 +301,7 @@ class DPTViTSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index c893938e428..94395bd2711 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -438,7 +438,7 @@ class FlavaSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/ijepa/modeling_ijepa.py b/src/transformers/models/ijepa/modeling_ijepa.py index e01290b089f..7d4619480c3 100644 --- a/src/transformers/models/ijepa/modeling_ijepa.py +++ b/src/transformers/models/ijepa/modeling_ijepa.py @@ -194,7 +194,7 @@ class IJepaSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 7a4f03fdf51..4665ff0f0e5 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -501,7 +501,7 @@ class LukeSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/mobilevit/modeling_mobilevit.py b/src/transformers/models/mobilevit/modeling_mobilevit.py index 7f2a23238e5..f41da2bafaf 100755 --- a/src/transformers/models/mobilevit/modeling_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_mobilevit.py @@ -215,7 +215,7 @@ class MobileViTSelfAttention(nn.Module): if hidden_size % config.num_attention_heads != 0: raise ValueError( - f"The hidden size {hidden_size,} is not a multiple of the number of attention " + f"The hidden size {hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py index 9939ddcb716..76397f160b5 100644 --- a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py @@ -262,7 +262,7 @@ class TFMobileViTSelfAttention(keras.layers.Layer): if hidden_size % config.num_attention_heads != 0: raise ValueError( - f"The hidden size {hidden_size,} is not a multiple of the number of attention " + f"The hidden size {hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/qwen2_audio/processing_qwen2_audio.py b/src/transformers/models/qwen2_audio/processing_qwen2_audio.py index 5eee95398b3..44f4d9a8a86 100644 --- a/src/transformers/models/qwen2_audio/processing_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/processing_qwen2_audio.py @@ -112,7 +112,7 @@ class Qwen2AudioProcessor(ProcessorMixin): # ensure we have as much audios as audio tokens num_audio_tokens = sum(sample.count(self.audio_token) for sample in text) - num_audios = 1 if type(audios) == np.ndarray else len(audios) + num_audios = 1 if isinstance(audios, np.ndarray) else len(audios) if num_audio_tokens != num_audios: raise ValueError( f"Found {num_audio_tokens} {self.audio_token} token{'s' if num_audio_tokens > 1 else ''} in provided text but received {num_audios} audio{'s' if num_audios > 1 else ''}" diff --git a/src/transformers/models/videomae/modeling_videomae.py b/src/transformers/models/videomae/modeling_videomae.py index 6e65ebf06d9..0e51cd98868 100755 --- a/src/transformers/models/videomae/modeling_videomae.py +++ b/src/transformers/models/videomae/modeling_videomae.py @@ -201,7 +201,7 @@ class VideoMAESelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 5ffb4b65ffb..07ed544d041 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -322,7 +322,7 @@ class ViltSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index b026a31d0a4..2fd430c1019 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -189,7 +189,7 @@ class ViTSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vit_mae/modeling_vit_mae.py b/src/transformers/models/vit_mae/modeling_vit_mae.py index 1595eb80ca3..86e71155d9c 100755 --- a/src/transformers/models/vit_mae/modeling_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_vit_mae.py @@ -362,7 +362,7 @@ class ViTMAESelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vit_msn/modeling_vit_msn.py b/src/transformers/models/vit_msn/modeling_vit_msn.py index d25611a41a6..79021a6b8b6 100644 --- a/src/transformers/models/vit_msn/modeling_vit_msn.py +++ b/src/transformers/models/vit_msn/modeling_vit_msn.py @@ -179,7 +179,7 @@ class ViTMSNSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py b/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py index d89f95e26b5..b4a1acd3361 100644 --- a/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +++ b/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py @@ -109,7 +109,7 @@ class VitPoseBackboneSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/vivit/modeling_vivit.py b/src/transformers/models/vivit/modeling_vivit.py index 22877c842f9..4ef0f29bc84 100755 --- a/src/transformers/models/vivit/modeling_vivit.py +++ b/src/transformers/models/vivit/modeling_vivit.py @@ -172,7 +172,7 @@ class VivitSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." ) diff --git a/src/transformers/models/yolos/modeling_yolos.py b/src/transformers/models/yolos/modeling_yolos.py index 729fd1b354b..5801e0bca28 100755 --- a/src/transformers/models/yolos/modeling_yolos.py +++ b/src/transformers/models/yolos/modeling_yolos.py @@ -237,7 +237,7 @@ class YolosSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"The hidden size {config.hidden_size} is not a multiple of the number of attention " f"heads {config.num_attention_heads}." )