diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py
index 72df3bece21..43a1b75e518 100644
--- a/src/transformers/data/datasets/glue.py
+++ b/src/transformers/data/datasets/glue.py
@@ -122,7 +122,7 @@ class GlueDataset(Dataset):
         with FileLock(lock_path):
             if os.path.exists(cached_features_file) and not args.overwrite_cache:
                 start = time.time()
-                self.features = torch.load(cached_features_file)
+                self.features = torch.load(cached_features_file, weights_only=True)
                 logger.info(
                     f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
                 )
diff --git a/src/transformers/models/bark/convert_suno_to_hf.py b/src/transformers/models/bark/convert_suno_to_hf.py
index f8c8399cb61..803656b623e 100644
--- a/src/transformers/models/bark/convert_suno_to_hf.py
+++ b/src/transformers/models/bark/convert_suno_to_hf.py
@@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
     if not os.path.exists(ckpt_path):
         logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
         _download(model_info["repo_id"], model_info["file_name"])
-    checkpoint = torch.load(ckpt_path, map_location=device)
+    checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
     # this is a hack
     model_args = checkpoint["model_args"]
     if "input_vocab_size" not in model_args:
diff --git a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
index e694d96ca0d..84dc415443f 100644
--- a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
@@ -71,7 +71,7 @@ def rename_key(dct, old, new):
 
 def load_xsum_checkpoint(checkpoint_path):
     """Checkpoint path should end in model.pt"""
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
     hub_interface.model.load_state_dict(sd["model"])
     return hub_interface
diff --git a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
index f7cb149053a..8e1e85d5c04 100644
--- a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
+++ b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
@@ -101,7 +101,7 @@ def main(raw_args=None):
 
     model = BertModel.from_pretrained(
         pretrained_model_name_or_path=args.model_name,
-        state_dict=torch.load(args.pytorch_model_path),
+        state_dict=torch.load(args.pytorch_model_path, weights_only=True),
         cache_dir=args.cache_dir,
     )
 
diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
index c930a850462..c390d2e39f6 100755
--- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo
     checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt")
     if not os.path.isfile(checkpoint_file):
         raise ValueError(f"path to the file {checkpoint_file} does not exist!")
-    chkpt = torch.load(checkpoint_file, map_location="cpu")
+    chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True)
 
     args = chkpt["cfg"]["model"]
 
diff --git a/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
index c5919b94d42..d8ce9b056c3 100644
--- a/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
@@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_
     """
     Copy/paste/tweak model's weights to our BERT structure.
     """
-    model = torch.load(checkpoint_path, map_location="cpu")
+    model = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     sd = model["model"]
     cfg = BlenderbotConfig.from_json_file(config_json_path)
     m = BlenderbotForConditionalGeneration(cfg)
diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
index 40ba6240d3e..73d251875dc 100644
--- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
@@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch(
             for i in range(pretraining_tp):
                 # load all TP files
                 f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
 
                 # Rename keys in the transformers names
                 keys = list(temp.keys())
@@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch(
             for i in range(pretraining_tp):
                 # load all TP files
                 f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
 
                 # Rename keys in the transformers names
                 keys = list(temp.keys())
diff --git a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
index ff45c9b597e..f74607f7b3c 100644
--- a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
+++ b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
@@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
         for possible_name in ["consolidated.pth", "consolidated.00.pth"]:
             possible_path = os.path.join(input_model_path, possible_name)
             if os.path.exists(possible_path):
-                loaded = torch.load(possible_path, map_location="cpu")
+                loaded = torch.load(possible_path, map_location="cpu", weights_only=True)
                 break
         assert loaded is not None
     else:
         # Sharded
         loaded = [
-            torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu")
+            torch.load(
+                os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True
+            )
             for i in range(num_shards)
         ]
 
@@ -314,7 +316,7 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
 
     # Load VQGAN weights
     vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt")
-    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"]
+    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"]
     for k, v in vqgan_state_dict.items():
         if "decoder" in k:
             continue  # we dont do image generation yet
diff --git a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
index 02c4b7b754b..adc9300ef51 100644
--- a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
+++ b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
@@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c
 
     hf_model = ChineseCLIPModel(config).eval()
 
-    pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
+    pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
     pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()}
 
     copy_text_model_and_projection(hf_model, pt_weights)
diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
index c614d61e5b3..be2cfdee87d 100644
--- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
+++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
@@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
     model = CLIPSegForImageSegmentation(config)
     model.eval()
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # remove some keys
     for key in state_dict.copy().keys():
diff --git a/src/transformers/models/clvp/convert_clvp_to_hf.py b/src/transformers/models/clvp/convert_clvp_to_hf.py
index 4ae6fd42549..89babb3c4ca 100644
--- a/src/transformers/models/clvp/convert_clvp_to_hf.py
+++ b/src/transformers/models/clvp/convert_clvp_to_hf.py
@@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path):
             _download(url=each_model_url, root=each_model_path)
 
         if each_model_name == "clvp":
-            clvp_checkpoint = torch.load(each_model_path, map_location="cpu")
+            clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
         else:
-            decoder_checkpoint = torch.load(each_model_path, map_location="cpu")
+            decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
 
     # Converting the weights
     converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint))
diff --git a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
index 9f76c92887f..d39777680b1 100644
--- a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
@@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo
     model = CvtForImageClassification(config)
     image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
     image_processor.size["shortest_edge"] = image_size
-    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"))
+    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True)
 
     huggingface_weights = OrderedDict()
     list_of_state_dict = []
diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
index a6e5081b484..ae3a6771015 100644
--- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
@@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p
     config.id2label = id2label
     config.label2id = {v: k for k, v in id2label.items()}
     # load original model from local path
-    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"]
+    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"]
     # Renaming the original model state dictionary to HF compatibile
     all_keys = list(loaded.keys())
     new_keys = convert_old_keys_to_new_keys(all_keys)
diff --git a/src/transformers/models/dac/convert_dac_checkpoint.py b/src/transformers/models/dac/convert_dac_checkpoint.py
index bfeb96fbdd4..b1728a7da11 100644
--- a/src/transformers/models/dac/convert_dac_checkpoint.py
+++ b/src/transformers/models/dac/convert_dac_checkpoint.py
@@ -205,7 +205,7 @@ def convert_checkpoint(
     sample_rate=16000,
     repo_id=None,
 ):
-    model_dict = torch.load(checkpoint_path, "cpu")
+    model_dict = torch.load(checkpoint_path, "cpu", weights_only=True)
 
     config = DacConfig()
 
diff --git a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
index 0c6f42f4ba7..3f9d7773516 100755
--- a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
@@ -224,7 +224,7 @@ def load_beit_model(args, is_finetuned, is_large):
     )
     patch_size = model.patch_embed.patch_size
     args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1])
-    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True)
 
     print(f"Load ckpt from {args.beit_checkpoint}")
     checkpoint_model = None
diff --git a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
index 781b823e96f..c88582eaccf 100644
--- a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
+++ b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
@@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint(
     logger.info("Converting model...")
 
     # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     # rename keys
     for key in state_dict.copy().keys():
         val = state_dict.pop(key)
diff --git a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
index 60e93efe7c6..6436451190a 100644
--- a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
+++ b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
@@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
     else:
         raise ValueError(f"Model name {model_name} not supported")
     checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename)
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # rename keys
     rename_keys = create_rename_keys(config)
diff --git a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
index 392750fa67a..c2e1ae6001d 100644
--- a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
+++ b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
@@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
     else:
         raise ValueError(f"Model name {model_name} not supported")
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # original state dict
     for name, param in state_dict.items():
diff --git a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
index 7431cd6136a..8ac9a13f5c5 100644
--- a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
@@ -123,7 +123,7 @@ def prepare_img():
 def convert_efficientformer_checkpoint(
     checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool
 ):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     config = EfficientFormerConfig.from_json_file(efficientformer_config_file)
     model = EfficientFormerForImageClassificationWithTeacher(config)
     model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1])
diff --git a/src/transformers/models/deprecated/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py
index 960c8f6ff57..aac3b2efe73 100644
--- a/src/transformers/models/deprecated/jukebox/convert_jukebox.py
+++ b/src/transformers/models/deprecated/jukebox/convert_jukebox.py
@@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None):
     weight_dict = []
     mapping = {}
     for i, dict_name in enumerate(model_to_convert):
-        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"]
+        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"]
 
         new_dic = {}
         for k in old_dic.keys():
diff --git a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
index 1f791dab240..c6dbb12890e 100644
--- a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
@@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
     print(
         "Original Mega encoder:",
         original_mlm.mega.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
     print(
         "Original Mega MLM layer:",
         original_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
 
@@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
     print(
         "HF Mega MLM layer:",
         hf_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
 
diff --git a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
index 466b14f6bad..cd87217f051 100644
--- a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
+++ b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
@@ -129,7 +129,7 @@ def convert_weight_and_push(
     print(f"Downloading weights for {name}...")
     checkpoint_path = cached_download(checkpoint)
     print(f"Converting {name}...")
-    from_state_dict = torch.load(checkpoint_path)["state_dict"]
+    from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"]
     from_model.load_state_dict(from_state_dict)
     from_model.eval()
     with torch.no_grad():
diff --git a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
index 5c6da13ae88..d43ff7f40dd 100644
--- a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
+++ b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
@@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
         filename=f"{filename}",
     )
 
-    state_dict = torch.load(filepath, map_location="cpu")
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
     # rename keys
     rename_keys = create_rename_keys(config)
     for src, dest in rename_keys:
diff --git a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
index fbf34012924..03f38084cfb 100644
--- a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -27,7 +27,7 @@ NEW_KEY = "lm_head.weight"
 
 
 def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
-    d = torch.load(checkpoint_path)
+    d = torch.load(checkpoint_path, weights_only=True)
     d[NEW_KEY] = d.pop(OLD_KEY)
     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
index d24c2f01db4..5151c0972a7 100644
--- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
@@ -29,7 +29,9 @@ CheckpointState = collections.namedtuple(
 
 def load_states_from_checkpoint(model_file: str) -> CheckpointState:
     print(f"Reading saved model from {model_file}")
-    state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
+    state_dict = torch.load(
+        model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True
+    )
     return CheckpointState(**state_dict)
 
 
diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
index d7dc6d104f4..ceae9b84711 100644
--- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
+++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
@@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
     config, expected_shape = get_dpt_config(checkpoint_url)
     # load original state_dict from URL
     # state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")
-    state_dict = torch.load(checkpoint_url, map_location="cpu")
+    state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True)
     # remove certain keys
     remove_ignore_keys_(state_dict)
     # rename keys
diff --git a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
index 4db97bd6883..f1fb0168705 100644
--- a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
+++ b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
@@ -325,7 +325,7 @@ def convert_checkpoint(
     )
     feature_extractor.save_pretrained(pytorch_dump_folder_path)
 
-    original_checkpoint = torch.load(checkpoint_path)
+    original_checkpoint = torch.load(checkpoint_path, weights_only=True)
     if "best_state" in original_checkpoint:
         # we might have a training state saved, in which case discard the yaml results and just retain the weights
         original_checkpoint = original_checkpoint["best_state"]
diff --git a/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
index bb9c432f822..3a5bb2d2e2e 100644
--- a/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
@@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint(
     # Prepare the model
     model = FastSpeech2ConformerModel(config)
 
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
     hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
 
     model.load_state_dict(hf_compatible_state_dict)
diff --git a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
index ec9f57ce714..70aada84bd5 100644
--- a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
@@ -104,7 +104,7 @@ def convert_hifigan_checkpoint(
 
     model = FastSpeech2ConformerHifiGan(config)
 
-    orig_checkpoint = torch.load(checkpoint_path)
+    orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
     load_weights(orig_checkpoint, model, config)
 
     model.save_pretrained(pytorch_dump_folder_path)
diff --git a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
index 2a780d5cf0b..6f840438dca 100644
--- a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
@@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint(
 
     model = FastSpeech2ConformerModel(model_config)
 
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
     hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
     model.load_state_dict(hf_compatible_state_dict)
 
diff --git a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
index 7b544125114..6408d0e1df0 100644
--- a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
+++ b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
@@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p
 
     encoder = Encoder()
     if os.path.exists(checkpoint_path):
-        ckpt = torch.load(checkpoint_path)
+        ckpt = torch.load(checkpoint_path, weights_only=True)
     else:
         ckpt = torch.hub.load_state_dict_from_url(checkpoint_path)
 
diff --git a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
index 95ebb2bfdb2..8b6e536a3ab 100644
--- a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
+++ b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
@@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder
     codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False)
 
     if os.path.exists(checkpoint_path):
-        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     else:
         state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu")
 
diff --git a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
index 6d029c0d13a..29ef7859c9a 100644
--- a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
+++ b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
@@ -87,7 +87,7 @@ def rename_state_dict(state_dict):
 
 def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False):
     sys.path.insert(0, ada_lib_path)
-    model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
+    model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
     state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
     state_dict = rename_state_dict(state_dict)
 
diff --git a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
index 9b71be35bfa..fd275c157f3 100644
--- a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
+++ b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
@@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
     head_dim = config.head_dim
 
     print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
-    model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+    model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
     model_state_dict.pop("freqs_cis")
 
     state_dict = {}
diff --git a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
index 1ad7d23c3c3..c41f9a2fdbb 100644
--- a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
+++ b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
@@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
 
         for file in files:
             print(file)
-            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu")
+            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
             model_state_dict.update(loaded_state_dict)
     else:
         print("Model does not seem to be sharded")
-        model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+        model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
         model_state_dict.pop("freqs_cis")
 
     state_dict = {}
diff --git a/src/transformers/models/git/convert_git_to_pytorch.py b/src/transformers/models/git/convert_git_to_pytorch.py
index 2f93a6b03a6..4a9d8a01599 100644
--- a/src/transformers/models/git/convert_git_to_pytorch.py
+++ b/src/transformers/models/git/convert_git_to_pytorch.py
@@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal
     if "large" in model_name and not is_video and "large-r" not in model_name:
         # large checkpoints take way too long to download
         checkpoint_path = model_name_to_path[model_name]
-        state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     else:
         checkpoint_url = model_name_to_url[model_name]
         state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[
diff --git a/src/transformers/models/glm/convert_glm_weights_to_hf.py b/src/transformers/models/glm/convert_glm_weights_to_hf.py
index 1053f984d7f..df1fd7537f4 100644
--- a/src/transformers/models/glm/convert_glm_weights_to_hf.py
+++ b/src/transformers/models/glm/convert_glm_weights_to_hf.py
@@ -53,7 +53,7 @@ def load_weights(input_dir: str):
     elif bin_files:
         bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1]))
         for file in bin_files:
-            tensors = torch.load(file, map_location="cpu")
+            tensors = torch.load(file, map_location="cpu", weights_only=True)
             all_weights.update(tensors)
         return all_weights
 
diff --git a/src/transformers/models/glpn/convert_glpn_to_pytorch.py b/src/transformers/models/glpn/convert_glpn_to_pytorch.py
index 5d18c3b73a5..51088fb7244 100644
--- a/src/transformers/models/glpn/convert_glpn_to_pytorch.py
+++ b/src/transformers/models/glpn/convert_glpn_to_pytorch.py
@@ -140,7 +140,7 @@ def convert_glpn_checkpoint(checkpoint_path, pytorch_dump_folder_path, push_to_h
     logger.info("Converting model...")
 
     # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
 
     # rename keys
     state_dict = rename_keys(state_dict)
diff --git a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py
index 2625701c1a7..c4e2ff67c5c 100644
--- a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py
+++ b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py
@@ -153,7 +153,7 @@ def main(args):
         raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}")
 
     # Load the model.
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # Load the config.
     config_megatron = checkpoint["hyper_parameters"]["cfg"]
diff --git a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py
index 059f10f6129..6bc28184985 100644
--- a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py
+++ b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py
@@ -163,7 +163,7 @@ def convert_groupvit_checkpoint(
     config = GroupViTConfig()
     model = GroupViTModel(config).eval()
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     new_state_dict = convert_state_dict(state_dict, config)
     missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False)
     assert missing_keys == ["text_model.embeddings.position_ids"]
diff --git a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py
index ff15b90088a..c66c41ce36b 100644
--- a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py
@@ -32,7 +32,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
     """
     Copy/paste/tweak model's weights to transformers design.
     """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS:
         raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}")
 
diff --git a/src/transformers/models/llama/convert_llama_weights_to_hf.py b/src/transformers/models/llama/convert_llama_weights_to_hf.py
index eb2862eb203..84b5c53a916 100644
--- a/src/transformers/models/llama/convert_llama_weights_to_hf.py
+++ b/src/transformers/models/llama/convert_llama_weights_to_hf.py
@@ -228,12 +228,17 @@ def write_model(
         if num_shards == 1:
             # Not sharded
             # (The sharded implementation would also work, but this is simpler.)
-            loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu")
+            loaded = torch.load(
+                os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu", weights_only=True
+            )
         else:
             # Sharded
             checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")])
             print("Loading in order:", checkpoint_list)
-            loaded = [torch.load(os.path.join(input_base_path, file), map_location="cpu") for file in checkpoint_list]
+            loaded = [
+                torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
+                for file in checkpoint_list
+            ]
         param_count = 0
         index_dict = {"weight_map": {}}
         for layer_i in range(n_layers):
diff --git a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py
index 06edc5c9b1a..85f21d4a5be 100644
--- a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py
+++ b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py
@@ -219,12 +219,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
 
     # verify inputs
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath, map_location="cpu")
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
     assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
 
     if model_id == "liuhaotian/llava-v1.6-mistral-7b":
         filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset")
-        original_input_ids = torch.load(filepath, map_location="cpu")
+        original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
         # replace -200 by image_token_index (since we use token ID = 32000 for the image token)
         original_input_ids[original_input_ids == -200] = image_token_index
         assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist()
@@ -233,7 +233,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
         filepath = hf_hub_download(
             repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset"
         )
-        original_input_ids = torch.load(filepath, map_location="cpu")
+        original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
         # replace -200 by image_token_index
         original_input_ids[original_input_ids == -200] = image_token_index
 
diff --git a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py
index 65c57f624f5..bd8b9e3c4c9 100644
--- a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py
+++ b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py
@@ -212,7 +212,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
     filepath = hf_hub_download(
         repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset"
     )
-    original_pixel_values = torch.load(filepath, map_location="cpu")
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
     assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
 
     image_sizes = torch.tensor([[899, 1024]])
diff --git a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py
index 4ef2131228b..cbd7600e963 100644
--- a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py
+++ b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py
@@ -42,7 +42,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
     longformer = LongformerModel.from_pretrained(longformer_model)
     lightning_model = LightningModel(longformer)
 
-    ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"))
+    ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"), weights_only=True)
     lightning_model.load_state_dict(ckpt["state_dict"])
 
     # init longformer question answering model
diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
index c86fa6e3089..aae550e8d09 100644
--- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
@@ -32,7 +32,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
     config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
 
     # Load in the weights from the checkpoint_path
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # Load the entity vocab file
     entity_vocab = load_entity_vocab(entity_vocab_path)
diff --git a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
index 97265fbdcf9..02e7ef23a08 100644
--- a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
@@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 
 
 def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
-    m2m_100 = torch.load(checkpoint_path, map_location="cpu")
+    m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     args = m2m_100["args"] or m2m_100["cfg"]["model"]
     state_dict = m2m_100["model"]
     remove_ignore_keys_(state_dict)
diff --git a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py
index 0cf7dcc0eda..f55b032207c 100644
--- a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py
@@ -108,7 +108,7 @@ def convert_mamba_checkpoint_file_to_huggingface_model_file(
         )
     logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}")
     # Load weights and config from paths
-    original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu")
+    original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu", weights_only=True)
     with open(config_json_file, "r", encoding="utf-8") as json_file:
         original_ssm_config_dict = json.load(json_file)
 
diff --git a/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py b/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py
index f68e9bd4904..bd1d413f40a 100644
--- a/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py
@@ -38,7 +38,7 @@ def load_state_dict_from_safetensors(mamba2_checkpoint_path: str, ckpt_name: str
 
 
 def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]:
-    return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu")
+    return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu", weights_only=True)
 
 
 def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config:
diff --git a/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py b/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py
index eb7f00bf771..909b7b41284 100644
--- a/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py
@@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_mbart_checkpoint_from_disk(
     checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False
 ):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     remove_ignore_keys_(state_dict)
     vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
 
diff --git a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py
index 0fc67866301..cd29b7ad78c 100644
--- a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py
+++ b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py
@@ -294,9 +294,9 @@ def main():
     if args.path_to_checkpoint.endswith(".zip"):
         with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
             with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
-                input_state_dict = torch.load(pytorch_dict, map_location="cpu")
+                input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
     else:
-        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
+        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
 
     if args.config_file == "":
         # Default config of megatron-bert 345m
diff --git a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py
index 284d8a3d454..548e2d1aeb3 100644
--- a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py
+++ b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py
@@ -275,7 +275,7 @@ def merge_transformers_sharded_states(path, num_checkpoints):
     state_dict = {}
     for i in range(1, num_checkpoints + 1):
         checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin")
-        current_chunk = torch.load(checkpoint_path, map_location="cpu")
+        current_chunk = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
         state_dict.update(current_chunk)
     return state_dict
 
@@ -298,7 +298,7 @@ def get_megatron_sharded_states(args, tp_size, pp_size, pp_rank):
             checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name)
             if os.path.isfile(checkpoint_path):
                 break
-        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
         tp_state_dicts.append(state_dict)
     return tp_state_dicts
 
@@ -338,7 +338,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
             rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name)
             break
     print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}")
-    state_dict = torch.load(rank0_checkpoint_path, map_location="cpu")
+    state_dict = torch.load(rank0_checkpoint_path, map_location="cpu", weights_only=True)
     megatron_args = state_dict.get("args", None)
     if megatron_args is None:
         raise ValueError(
@@ -634,7 +634,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
     sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")]
     if len(sub_dirs) == 1:
         checkpoint_name = "pytorch_model.bin"
-        state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu")
+        state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu", weights_only=True)
     else:
         num_checkpoints = len(sub_dirs) - 1
         state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints)
diff --git a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
index 38060f8af5c..5515b6d6155 100644
--- a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
+++ b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
@@ -263,9 +263,9 @@ def main():
     if args.path_to_checkpoint.endswith(".zip"):
         with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
             with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
-                input_state_dict = torch.load(pytorch_dict, map_location="cpu")
+                input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
     else:
-        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
+        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
 
     ds_args = input_state_dict.get("args", None)
 
diff --git a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py
index e7f9da0015c..0f9cf597d5c 100644
--- a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py
+++ b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py
@@ -208,7 +208,9 @@ def convert_and_write_model(input_dir: str, output_dir: str, max_position_embedd
     else:
         shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)]
         shards = sorted(shards, key=lambda x: int(x.split(".")[1]))
-        loaded_shards = [torch.load(os.path.join(input_dir, file), map_location="cpu") for file in shards]
+        loaded_shards = [
+            torch.load(os.path.join(input_dir, file), map_location="cpu", weights_only=True) for file in shards
+        ]
         full_state_dict = convert_state_dict_sharded(loaded_shards, config)
 
     # Load weights into model and resave them
diff --git a/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py b/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py
index 3309627b278..7e9f25d37f4 100644
--- a/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py
+++ b/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py
@@ -94,7 +94,8 @@ def write_model(model_path, input_base_path, model_size, safe_serialization=True
     print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
     # Load weights
     loaded = [
-        torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu") for i in range(8)
+        torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu", weights_only=True)
+        for i in range(8)
     ]
 
     merged_state_dict = {}
diff --git a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py
index b2c40e27bb2..2d361af61e6 100644
--- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py
+++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py
@@ -342,10 +342,15 @@ def write_model(
             path = os.path.join(input_base_path, "consolidated.00.pth")
         else:
             path = os.path.join(input_base_path, "consolidated.pth")
-        loaded = [torch.load(path, map_location="cpu", mmap=True)]
+        loaded = [torch.load(path, map_location="cpu", mmap=True, weights_only=True)]
     else:
         loaded = [
-            torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu", mmap=True)
+            torch.load(
+                os.path.join(input_base_path, f"consolidated.{i:02d}.pth"),
+                map_location="cpu",
+                mmap=True,
+                weights_only=True,
+            )
             for i in range(num_shards)
         ]
 
diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
index f361082fb3c..5a74d4114ac 100644
--- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
@@ -33,7 +33,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
     config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
 
     # Load in the weights from the checkpoint_path
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["module"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["module"]
 
     # Load the entity vocab file
     entity_vocab = load_original_entity_vocab(entity_vocab_path)
diff --git a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py
index 522d6671d12..7dc6dfa288b 100644
--- a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py
+++ b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py
@@ -199,7 +199,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
     config = get_mobilevit_config(mobilevit_name)
 
     # load original state_dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # load 🤗 model
     if mobilevit_name.startswith("deeplabv3_"):
diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py
index d08642666cd..485cbf5aa09 100644
--- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py
+++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py
@@ -239,7 +239,7 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
     config = get_mobilevitv2_config(task_name, orig_config_path)
 
     # load original state_dict
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # load huggingface model
     if task_name.startswith("ade20k_") or task_name.startswith("voc_"):
diff --git a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py
index f558f7c7bce..b35cd7662db 100644
--- a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py
+++ b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py
@@ -77,7 +77,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict):
 
 
 def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
     config = MraConfig.from_json_file(mra_config_file)
     model = MraForMaskedLM(config)
 
diff --git a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
index a84138a6246..317c5c713c7 100644
--- a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
@@ -77,7 +77,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
     for expert in range(num_experts):
         expert_path = switch_checkpoint_path + f"-rank-{expert}.pt"
         if os.path.isfile(expert_path):
-            expert_state = torch.load(expert_path)["model"]
+            expert_state = torch.load(expert_path, weights_only=True)["model"]
             remove_ignore_keys_(expert_state)
             expert_state = rename_fairseq_keys(expert_state, expert)
             save_path = os.path.join(
@@ -93,7 +93,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
     save_path = os.path.join(
         dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
     )
-    shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"]
+    shared_weights = torch.load(switch_checkpoint_path + "-shared.pt", weights_only=True)["model"]
     remove_ignore_keys_(shared_weights)
     shared_weights = rename_fairseq_keys(shared_weights, None)
     shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"]
diff --git a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
index 8d5a52bdbf8..6664a7d8ad0 100644
--- a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
@@ -78,7 +78,7 @@ def convert_checkpoint_helper(config, orig_state_dict):
 
 
 def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
     config = NystromformerConfig.from_json_file(nystromformer_config_file)
     model = NystromformerForMaskedLM(config)
 
diff --git a/src/transformers/models/olmo/convert_olmo_weights_to_hf.py b/src/transformers/models/olmo/convert_olmo_weights_to_hf.py
index 0e77bdc69e7..b3a2ad80b01 100644
--- a/src/transformers/models/olmo/convert_olmo_weights_to_hf.py
+++ b/src/transformers/models/olmo/convert_olmo_weights_to_hf.py
@@ -91,7 +91,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
 
     # Not sharded
     # (The sharded implementation would also work, but this is simpler.)
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
 
     param_count = 0
     index_dict = {"weight_map": {}}
diff --git a/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py b/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py
index d804e7a159e..1e8fb54ddb6 100644
--- a/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py
+++ b/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py
@@ -107,7 +107,7 @@ def write_model(
 
     # Not sharded
     # (The sharded implementation would also work, but this is simpler.)
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
 
     param_count = 0
     index_dict: Dict[str, Any] = {"weight_map": {}}
diff --git a/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py b/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py
index a14cd50a0e7..3fc5a49c7e5 100644
--- a/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py
+++ b/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py
@@ -119,7 +119,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
     print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
 
     # Not sharded
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
 
     param_count = 0
     index_dict = {"weight_map": {}}
diff --git a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py
index 486b477f973..9a9b0c306cb 100644
--- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py
@@ -29,9 +29,9 @@ logger = logging.get_logger(__name__)
 
 def load_checkpoint(checkpoint_path):
     """Checkpoint path should end in model.pt"""
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     if "model" in sd.keys():
-        sd = torch.load(checkpoint_path, map_location="cpu")["model"]
+        sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # pop unnecessary weights
     keys_to_delete = [
diff --git a/src/transformers/models/owlv2/convert_owlv2_to_hf.py b/src/transformers/models/owlv2/convert_owlv2_to_hf.py
index ed563b2c5bd..69665bab1d5 100644
--- a/src/transformers/models/owlv2/convert_owlv2_to_hf.py
+++ b/src/transformers/models/owlv2/convert_owlv2_to_hf.py
@@ -268,10 +268,10 @@ def convert_owlv2_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pa
 
     # Verify pixel_values and input_ids
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath).permute(0, 3, 1, 2)
+    original_pixel_values = torch.load(filepath, weights_only=True).permute(0, 3, 1, 2)
 
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset")
-    original_input_ids = torch.load(filepath).squeeze()
+    original_input_ids = torch.load(filepath, weights_only=True).squeeze()
 
     filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png")
     image = Image.open(filepath)
diff --git a/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py b/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py
index 6cd61b9f71c..c4b410fd3bb 100644
--- a/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py
+++ b/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py
@@ -82,7 +82,7 @@ def convert_persimmon_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_mode
     import sys
 
     sys.path.insert(0, ada_lib_path)
-    model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
+    model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
     state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
     state_dict = rename_state_dict(state_dict)
 
diff --git a/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py b/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py
index eac4a27d11c..0a2bb9553e0 100644
--- a/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py
+++ b/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py
@@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_plbart_checkpoint_from_disk(
     checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False
 ):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     remove_ignore_keys_(state_dict)
     vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
 
diff --git a/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py b/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py
index e5fad6da1a3..ddcfb9cd241 100644
--- a/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py
+++ b/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py
@@ -151,7 +151,7 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
     logger.info(f"Converting model {model_name}...")
 
     # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
 
     # rename keys
     state_dict = rename_keys(state_dict)
diff --git a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py
index 54b8bb67e60..84788ac6aec 100644
--- a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py
+++ b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py
@@ -26,7 +26,7 @@ from transformers import Pop2PianoConfig, Pop2PianoForConditionalGeneration
 
 # This weights were downloaded from the official pop2piano repository
 # https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt
-official_weights = torch.load("./model-1999-val_0.67311615.ckpt")
+official_weights = torch.load("./model-1999-val_0.67311615.ckpt", weights_only=True)
 state_dict = {}
 
 
diff --git a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py
index 059a7933775..237be38fff3 100644
--- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py
+++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py
@@ -173,7 +173,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
         filename=f"{filename}",
     )
 
-    state_dict = torch.load(filepath, map_location="cpu")["state_dict"]
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)["state_dict"]
     state_dict = {key[9:]: state_dict[key] for key in state_dict}
 
     # Convert state dict using mappings
diff --git a/src/transformers/models/pvt/convert_pvt_to_pytorch.py b/src/transformers/models/pvt/convert_pvt_to_pytorch.py
index 99002e3d67c..633d759123f 100644
--- a/src/transformers/models/pvt/convert_pvt_to_pytorch.py
+++ b/src/transformers/models/pvt/convert_pvt_to_pytorch.py
@@ -165,7 +165,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
         raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
     config = PvtConfig(name_or_path=config_path)
     # load original model from https://github.com/whai362/PVT
-    state_dict = torch.load(pvt_checkpoint, map_location="cpu")
+    state_dict = torch.load(pvt_checkpoint, map_location="cpu", weights_only=True)
 
     rename_keys = create_rename_keys(config)
     for src, dest in rename_keys:
diff --git a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py
index b5178cc2e99..b315d540dab 100644
--- a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py
+++ b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py
@@ -207,7 +207,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
         )
     config = PvtV2Config.from_pretrained(config_path)
     # load original model from https://github.com/whai362/PVT
-    state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu")
+    state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu", weights_only=True)
 
     rename_keys = create_rename_keys(config)
     for src, dest in rename_keys:
diff --git a/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py b/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py
index dc6619e217e..ea1cdd58ec9 100644
--- a/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py
+++ b/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py
@@ -71,7 +71,7 @@ LAYER_NAME_MAPPING = {"embedder.weight": "model.embed_tokens.weight"}
 
 def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32):
     print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
-    model_state_dict = torch.load(input_base_path, map_location="cpu")
+    model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)
 
     REPLACEMENT = {
         "blocks.": "layers.",
diff --git a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
index b8491db08b1..c4a6b03162f 100644
--- a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
@@ -37,7 +37,9 @@ def convert_roberta_prelayernorm_checkpoint_to_pytorch(checkpoint_repo: str, pyt
     )
 
     # convert state_dict
-    original_state_dict = torch.load(hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"))
+    original_state_dict = torch.load(
+        hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"), weights_only=True
+    )
     state_dict = {}
     for tensor_key, tensor_value in original_state_dict.items():
         # The transformer implementation gives the model a unique name, rather than overwiriting 'roberta'
diff --git a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
index a0c97fc4e23..87d35db2236 100644
--- a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
+++ b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
@@ -112,7 +112,7 @@ def convert_rmkv_checkpoint_to_hf_format(
 
     # 3. Download model file then convert state_dict
     model_file = hf_hub_download(repo_id, checkpoint_file)
-    state_dict = torch.load(model_file, map_location="cpu")
+    state_dict = torch.load(model_file, map_location="cpu", weights_only=True)
     state_dict = convert_state_dict(state_dict)
 
     # 4. Split in shards and save
@@ -147,7 +147,7 @@ def convert_rmkv_checkpoint_to_hf_format(
         gc.collect()
 
         for shard_file in shard_files:
-            state_dict = torch.load(os.path.join(output_dir, shard_file))
+            state_dict = torch.load(os.path.join(output_dir, shard_file), weights_only=True)
             torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file))
 
     del state_dict
diff --git a/src/transformers/models/sam/convert_sam_to_hf.py b/src/transformers/models/sam/convert_sam_to_hf.py
index dd8818b68cf..76d8884d951 100644
--- a/src/transformers/models/sam/convert_sam_to_hf.py
+++ b/src/transformers/models/sam/convert_sam_to_hf.py
@@ -137,7 +137,7 @@ def replace_keys(state_dict):
 def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub):
     config = get_config(model_name)
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     state_dict = replace_keys(state_dict)
 
     image_processor = SamImageProcessor()
diff --git a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
index 3bbc86e433b..c84e006ad64 100644
--- a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
+++ b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
@@ -191,9 +191,9 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
 
     # load original state dict
     if encoder_only:
-        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
     else:
-        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))["state_dict"]
+        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)["state_dict"]
 
     # rename keys
     state_dict = rename_keys(state_dict, encoder_only=encoder_only)
diff --git a/src/transformers/models/siglip/convert_siglip_to_hf.py b/src/transformers/models/siglip/convert_siglip_to_hf.py
index 8b0a8a250dd..b61bd7ffb70 100644
--- a/src/transformers/models/siglip/convert_siglip_to_hf.py
+++ b/src/transformers/models/siglip/convert_siglip_to_hf.py
@@ -441,9 +441,9 @@ def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logit
             raise ValueError("Image size not supported")
 
         filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset")
-        original_pixel_values = torch.load(filepath)
+        original_pixel_values = torch.load(filepath, weights_only=True)
         filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset")
-        original_input_ids = torch.load(filepath)
+        original_input_ids = torch.load(filepath, weights_only=True)
 
         if "i18n" not in model_name:
             assert inputs.input_ids.tolist() == original_input_ids.tolist()
diff --git a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
index eb4d8526247..9286fae776f 100644
--- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
+++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
@@ -52,7 +52,7 @@ def make_linear_from_emb(emb):
 
 
 def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path):
-    m2m_100 = torch.load(checkpoint_path, map_location="cpu")
+    m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     args = m2m_100["args"]
     state_dict = m2m_100["model"]
     lm_head_weights = state_dict["decoder.output_projection.weight"]
diff --git a/src/transformers/models/speecht5/convert_hifigan.py b/src/transformers/models/speecht5/convert_hifigan.py
index 4d78bb73af3..b39012f8e25 100644
--- a/src/transformers/models/speecht5/convert_hifigan.py
+++ b/src/transformers/models/speecht5/convert_hifigan.py
@@ -70,7 +70,7 @@ def convert_hifigan_checkpoint(
 
     model = SpeechT5HifiGan(config)
 
-    orig_checkpoint = torch.load(checkpoint_path)
+    orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
     load_weights(orig_checkpoint["model"]["generator"], model, config)
 
     stats = np.load(stats_path)
diff --git a/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
index 20dea800d9d..c16e11d2b25 100644
--- a/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
@@ -361,7 +361,7 @@ def convert_speecht5_checkpoint(
     processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor)
     processor.save_pretrained(pytorch_dump_folder_path)
 
-    fairseq_checkpoint = torch.load(checkpoint_path)
+    fairseq_checkpoint = torch.load(checkpoint_path, weights_only=True)
     recursively_load_weights(fairseq_checkpoint["model"], model, task)
 
     model.save_pretrained(pytorch_dump_folder_path)
diff --git a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py
index 21ecebebe24..3567bb674e9 100644
--- a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py
+++ b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py
@@ -125,7 +125,7 @@ def convert_swiftformer_checkpoint(swiftformer_name, pytorch_dump_folder_path, o
         if original_ckpt.startswith("https"):
             checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True)
         else:
-            checkpoint = torch.load(original_ckpt, map_location="cpu")
+            checkpoint = torch.load(original_ckpt, map_location="cpu", weights_only=True)
     state_dict = checkpoint
 
     rename_keys = create_rename_keys(state_dict)
diff --git a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py
index 6402346289c..9a87ff693af 100644
--- a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py
+++ b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py
@@ -121,7 +121,7 @@ def convert_state_dict(orig_state_dict, model):
 
 
 def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     config = get_swin_config(model_name)
     model = SwinForMaskedImageModeling(config)
diff --git a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py
index ce4d13421ff..cda9b0c1827 100644
--- a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py
+++ b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py
@@ -143,7 +143,7 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
     # download original checkpoint, hosted on Google Drive
     output = "pytorch_model.bin"
     gdown.cached_download(checkpoint_url, output, quiet=False)
-    files = torch.load(output, map_location="cpu")
+    files = torch.load(output, map_location="cpu", weights_only=True)
     if "model" in files:
         state_dict = files["model"]
     elif "module" in files:
diff --git a/src/transformers/models/udop/convert_udop_to_hf.py b/src/transformers/models/udop/convert_udop_to_hf.py
index f2d54b8ca54..8ba0de55df7 100644
--- a/src/transformers/models/udop/convert_udop_to_hf.py
+++ b/src/transformers/models/udop/convert_udop_to_hf.py
@@ -98,7 +98,7 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
 
     # load original state dict
     checkpoint_path = name_to_checkpoint_path[model_name]
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     print("Checkpoint path:", checkpoint_path)
 
@@ -177,12 +177,12 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
     # autoregressive decoding with original input data
     print("Testing generation with original inputs...")
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset")
-    input_ids = torch.load(filepath)
+    input_ids = torch.load(filepath, weights_only=True)
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset")
-    bbox = torch.load(filepath)
+    bbox = torch.load(filepath, weights_only=True)
     pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt"
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset")
-    pixel_values = torch.load(filepath)
+    pixel_values = torch.load(filepath, weights_only=True)
 
     print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True))
     print("Bbox shape:", bbox.shape)
diff --git a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
index fca35acb634..0f1256e0ca3 100644
--- a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
@@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
     """
     Copy/paste/tweak model's weights to transformers design.
     """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     downstream_dict = checkpoint["Downstream"]
 
diff --git a/src/transformers/models/univnet/convert_univnet.py b/src/transformers/models/univnet/convert_univnet.py
index 30520b7fa14..f790efab22f 100644
--- a/src/transformers/models/univnet/convert_univnet.py
+++ b/src/transformers/models/univnet/convert_univnet.py
@@ -106,7 +106,7 @@ def convert_univnet_checkpoint(
     repo_id=None,
     safe_serialization=False,
 ):
-    model_state_dict_base = torch.load(checkpoint_path, map_location="cpu")
+    model_state_dict_base = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     # Get the generator's state dict
     state_dict = model_state_dict_base["model_g"]
 
diff --git a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py
index 4c07ca0a03a..fff886f8a83 100644
--- a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py
+++ b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py
@@ -99,7 +99,7 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p
     state_dict_temp = "pytorch_model-0000{i}-of-00002.bin"
     for shard in range(1, 3):
         state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard))
-        state_dict = torch.load(state_dict_path, map_location="cpu")
+        state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
         state_dict = convert_state_dict_to_hf(state_dict)
         model.load_state_dict(state_dict, strict=False, assign=True)
         model_state_dict -= set(state_dict.keys())
diff --git a/src/transformers/models/videomae/convert_videomae_to_pytorch.py b/src/transformers/models/videomae/convert_videomae_to_pytorch.py
index c98160a6bb8..011c1862eb6 100644
--- a/src/transformers/models/videomae/convert_videomae_to_pytorch.py
+++ b/src/transformers/models/videomae/convert_videomae_to_pytorch.py
@@ -187,7 +187,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
     # download original checkpoint, hosted on Google Drive
     output = "pytorch_model.bin"
     gdown.cached_download(checkpoint_url, output, quiet=False)
-    files = torch.load(output, map_location="cpu")
+    files = torch.load(output, map_location="cpu", weights_only=True)
     if "model" in files:
         state_dict = files["model"]
     else:
@@ -204,7 +204,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
 
     if "finetuned" not in model_name:
         local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt")
-        inputs["bool_masked_pos"] = torch.load(local_path)
+        inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True)
 
     outputs = model(**inputs)
     logits = outputs.logits
diff --git a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py
index 2914cfdfcd4..f0fa69ab872 100644
--- a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py
+++ b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py
@@ -78,7 +78,7 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path
 
     state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin")
 
-    state_dict = torch.load(state_dict_path, map_location="cpu")
+    state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
     state_dict = convert_state_dict_to_hf(state_dict)
 
     model.load_state_dict(state_dict, strict=True, assign=True)
diff --git a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
index 59733539415..ae5af9a343d 100644
--- a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
@@ -56,7 +56,7 @@ ACCEPTABLE_CHECKPOINTS = [
 
 
 def load_state_dict(checkpoint_path):
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     return sd
 
 
diff --git a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py
index bcc05563337..5153e1faf52 100644
--- a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py
+++ b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py
@@ -82,7 +82,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
 
     filename = model_name_to_filename[model_name]
     filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model")
-    state_dict = torch.load(filepath, map_location="cpu")
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
 
     # rename keys
     for key in state_dict.copy().keys():
diff --git a/src/transformers/models/vitpose/convert_vitpose_to_hf.py b/src/transformers/models/vitpose/convert_vitpose_to_hf.py
index 0d36e332a4f..e4666751a10 100644
--- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py
+++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py
@@ -207,7 +207,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
     )
 
     print("Converting model...")
-    original_state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
+    original_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
     all_keys = list(original_state_dict.keys())
     new_keys = convert_old_keys_to_new_keys(all_keys)
 
@@ -264,7 +264,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
     pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values
 
     filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath, map_location="cpu")["img"]
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)["img"]
     # we allow for a small difference in the pixel values due to the original repository using cv2
     assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1)
 
diff --git a/src/transformers/models/vits/convert_original_checkpoint.py b/src/transformers/models/vits/convert_original_checkpoint.py
index 267f72ccd08..7f122e86fa5 100644
--- a/src/transformers/models/vits/convert_original_checkpoint.py
+++ b/src/transformers/models/vits/convert_original_checkpoint.py
@@ -346,7 +346,7 @@ def convert_checkpoint(
 
     model.decoder.apply_weight_norm()
 
-    orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
     recursively_load_weights(orig_checkpoint["model"], model)
 
     model.decoder.remove_weight_norm()
diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
index 1702bc5a473..fa33416c8bd 100644
--- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
@@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
     """
     Copy/paste/tweak model's weights to transformers design.
     """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     downstream_dict = checkpoint["Downstream"]
 
diff --git a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
index e41ae0881d9..91d4853bade 100644
--- a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
@@ -179,7 +179,7 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro
 @torch.no_grad()
 def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None):
     # load the pre-trained checkpoints
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=True)
     cfg = WavLMConfigOrig(checkpoint["cfg"])
     model = WavLMOrig(cfg)
     model.load_state_dict(checkpoint["model"])
diff --git a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
index 447d4db67fc..b8c4c337679 100644
--- a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
@@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
     """
     Copy/paste/tweak model's weights to transformers design.
     """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     downstream_dict = checkpoint["Downstream"]
 
diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py
index bb63cd24cd6..343fb5513b5 100755
--- a/src/transformers/models/whisper/convert_openai_to_hf.py
+++ b/src/transformers/models/whisper/convert_openai_to_hf.py
@@ -157,7 +157,7 @@ def _download(url: str, root: str) -> Any:
     if os.path.isfile(download_target):
         model_bytes = open(download_target, "rb").read()
         if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
-            return torch.load(io.BytesIO(model_bytes))
+            return torch.load(io.BytesIO(model_bytes), weights_only=True)
         else:
             warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
 
@@ -179,7 +179,7 @@ def _download(url: str, root: str) -> Any:
             "Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
         )
 
-    return torch.load(io.BytesIO(model_bytes))
+    return torch.load(io.BytesIO(model_bytes), weights_only=True)
 
 
 def convert_openai_whisper_to_tfms(
@@ -190,7 +190,7 @@ def convert_openai_whisper_to_tfms(
         original_checkpoint = _download(_MODELS[checkpoint_path], root)
         openai_version = checkpoint_path
     else:
-        original_checkpoint = torch.load(checkpoint_path, map_location="cpu")
+        original_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
         openai_version = None
 
     dimensions = original_checkpoint["dims"]
diff --git a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py
index 8ff878f2cc9..6f36b190558 100644
--- a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py
+++ b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py
@@ -279,7 +279,7 @@ def convert_xclip_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_
     if "drive" in checkpoint_url:
         output = "pytorch_model.bin"
         gdown.cached_download(checkpoint_url, output, quiet=False)
-        state_dict = torch.load(output, map_location="cpu")["model"]
+        state_dict = torch.load(output, map_location="cpu", weights_only=True)["model"]
     else:
         state_dict = torch.hub.load_state_dict_from_url(checkpoint_url)["model"]
 
diff --git a/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py b/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py
index f8b5dba3c1e..dc898196260 100644
--- a/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py
+++ b/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py
@@ -26,7 +26,7 @@ def make_linear_from_emb(emb):
 
 
 def convert_fairseq_xglm_checkpoint_from_disk(checkpoint_path):
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     args = Namespace(**checkpoint["cfg"]["model"])
     state_dict = checkpoint["model"]
     remove_ignore_keys_(state_dict)
diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
index 71c3a1f989f..2e5a17921d0 100755
--- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
@@ -29,7 +29,7 @@ logging.set_verbosity_info()
 
 def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_path):
     # Load checkpoint
-    chkpt = torch.load(xlm_checkpoint_path, map_location="cpu")
+    chkpt = torch.load(xlm_checkpoint_path, map_location="cpu", weights_only=True)
 
     state_dict = chkpt["model"]
 
diff --git a/src/transformers/models/yolos/convert_yolos_to_pytorch.py b/src/transformers/models/yolos/convert_yolos_to_pytorch.py
index 6cddc606614..907a11d067b 100644
--- a/src/transformers/models/yolos/convert_yolos_to_pytorch.py
+++ b/src/transformers/models/yolos/convert_yolos_to_pytorch.py
@@ -163,7 +163,7 @@ def convert_yolos_checkpoint(
     config = get_yolos_config(yolos_name)
 
     # load original state_dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # load 🤗 model
     model = YolosForObjectDetection(config)
diff --git a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py
index be46a4de81b..950769ae1e0 100644
--- a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py
+++ b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py
@@ -75,7 +75,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict):
 
 
 def convert_yoso_checkpoint(checkpoint_path, yoso_config_file, pytorch_dump_path):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
     config = YosoConfig.from_json_file(yoso_config_file)
     model = YosoForMaskedLM(config)
 
diff --git a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py
index 9a6701c35bc..cbf47a636b7 100644
--- a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py
+++ b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py
@@ -347,7 +347,7 @@ def convert_zoedepth_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
         filename="zoedepth_pixel_values.pt",
         repo_type="dataset",
     )
-    original_pixel_values = torch.load(filepath, map_location="cpu")
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
     assert torch.allclose(pixel_values, original_pixel_values)
 
     # verify logits
@@ -358,7 +358,7 @@ def convert_zoedepth_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
         repo_type="dataset",
         revision="1865dbb81984f01c89e83eec10f8d07efd10743d",
     )
-    cats_pixel_values = torch.load(filepath, map_location="cpu")
+    cats_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
     depth = model(cats_pixel_values).predicted_depth
 
     # Verify logits