Add weights_only=True to torch.load (#37062)

2025-08-02 11:11:05 +06:00 · 2025-04-12 00:18:41 +08:00 · 2025-04-12 00:18:41 +08:00 · 28eae8b4bd
commit 28eae8b4bd
parent bf46e44878
106 changed files with 161 additions and 136 deletions
--- a/src/transformers/data/datasets/glue.py
+++ b/src/transformers/data/datasets/glue.py
@ -122,7 +122,7 @@ class GlueDataset(Dataset):
        with FileLock(lock_path):
            if os.path.exists(cached_features_file) and not args.overwrite_cache:
                start = time.time()
-                self.features = torch.load(cached_features_file)
+                self.features = torch.load(cached_features_file, weights_only=True)
                logger.info(
                    f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
                )
--- a/src/transformers/models/bark/convert_suno_to_hf.py
+++ b/src/transformers/models/bark/convert_suno_to_hf.py
@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
    if not os.path.exists(ckpt_path):
        logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
        _download(model_info["repo_id"], model_info["file_name"])
-    checkpoint = torch.load(ckpt_path, map_location=device)
+    checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
    # this is a hack
    model_args = checkpoint["model_args"]
    if "input_vocab_size" not in model_args:
--- a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
@ -71,7 +71,7 @@ def rename_key(dct, old, new):
 def load_xsum_checkpoint(checkpoint_path):
    """Checkpoint path should end in model.pt"""
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
    hub_interface.model.load_state_dict(sd["model"])
    return hub_interface
--- a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
+++ b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
@ -101,7 +101,7 @@ def main(raw_args=None):
    model = BertModel.from_pretrained(
        pretrained_model_name_or_path=args.model_name,
-        state_dict=torch.load(args.pytorch_model_path),
+        state_dict=torch.load(args.pytorch_model_path, weights_only=True),
        cache_dir=args.cache_dir,
    )
--- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo
    checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt")
    if not os.path.isfile(checkpoint_file):
        raise ValueError(f"path to the file {checkpoint_file} does not exist!")
-    chkpt = torch.load(checkpoint_file, map_location="cpu")
+    chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True)
    args = chkpt["cfg"]["model"]
--- a/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_
    """
    Copy/paste/tweak model's weights to our BERT structure.
    """
-    model = torch.load(checkpoint_path, map_location="cpu")
+    model = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    sd = model["model"]
    cfg = BlenderbotConfig.from_json_file(config_json_path)
    m = BlenderbotForConditionalGeneration(cfg)
--- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch(
            for i in range(pretraining_tp):
                # load all TP files
                f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
                # Rename keys in the transformers names
                keys = list(temp.keys())
@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch(
            for i in range(pretraining_tp):
                # load all TP files
                f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
                # Rename keys in the transformers names
                keys = list(temp.keys())
--- a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
+++ b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
        for possible_name in ["consolidated.pth", "consolidated.00.pth"]:
            possible_path = os.path.join(input_model_path, possible_name)
            if os.path.exists(possible_path):
-                loaded = torch.load(possible_path, map_location="cpu")
+                loaded = torch.load(possible_path, map_location="cpu", weights_only=True)
                break
        assert loaded is not None
    else:
        # Sharded
        loaded = [
-            torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu")
+            torch.load(
                os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True
            )
            for i in range(num_shards)
        ]
@ -314,7 +316,7 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
    # Load VQGAN weights
    vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt")
-    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"]
+    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"]
    for k, v in vqgan_state_dict.items():
        if "decoder" in k:
            continue  # we dont do image generation yet
--- a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
+++ b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c
    hf_model = ChineseCLIPModel(config).eval()
-    pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
+    pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
    pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()}
    copy_text_model_and_projection(hf_model, pt_weights)
--- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
+++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
    model = CLIPSegForImageSegmentation(config)
    model.eval()
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # remove some keys
    for key in state_dict.copy().keys():
--- a/src/transformers/models/clvp/convert_clvp_to_hf.py
+++ b/src/transformers/models/clvp/convert_clvp_to_hf.py
@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path):
            _download(url=each_model_url, root=each_model_path)
        if each_model_name == "clvp":
-            clvp_checkpoint = torch.load(each_model_path, map_location="cpu")
+            clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
        else:
-            decoder_checkpoint = torch.load(each_model_path, map_location="cpu")
+            decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
    # Converting the weights
    converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint))
--- a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo
    model = CvtForImageClassification(config)
    image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
    image_processor.size["shortest_edge"] = image_size
-    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"))
+    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True)
    huggingface_weights = OrderedDict()
    list_of_state_dict = []
--- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p
    config.id2label = id2label
    config.label2id = {v: k for k, v in id2label.items()}
    # load original model from local path
-    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"]
+    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"]
    # Renaming the original model state dictionary to HF compatibile
    all_keys = list(loaded.keys())
    new_keys = convert_old_keys_to_new_keys(all_keys)
--- a/src/transformers/models/dac/convert_dac_checkpoint.py
+++ b/src/transformers/models/dac/convert_dac_checkpoint.py
@ -205,7 +205,7 @@ def convert_checkpoint(
    sample_rate=16000,
    repo_id=None,
 ):
-    model_dict = torch.load(checkpoint_path, "cpu")
+    model_dict = torch.load(checkpoint_path, "cpu", weights_only=True)
    config = DacConfig()
--- a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
@ -224,7 +224,7 @@ def load_beit_model(args, is_finetuned, is_large):
    )
    patch_size = model.patch_embed.patch_size
    args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1])
-    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True)
    print(f"Load ckpt from {args.beit_checkpoint}")
    checkpoint_model = None
--- a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
+++ b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint(
    logger.info("Converting model...")
    # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    # rename keys
    for key in state_dict.copy().keys():
        val = state_dict.pop(key)
--- a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
+++ b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
    else:
        raise ValueError(f"Model name {model_name} not supported")
    checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename)
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    # rename keys
    rename_keys = create_rename_keys(config)
--- a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
+++ b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
    else:
        raise ValueError(f"Model name {model_name} not supported")
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    # original state dict
    for name, param in state_dict.items():
--- a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
@ -123,7 +123,7 @@ def prepare_img():
 def convert_efficientformer_checkpoint(
    checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool
 ):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    config = EfficientFormerConfig.from_json_file(efficientformer_config_file)
    model = EfficientFormerForImageClassificationWithTeacher(config)
    model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1])
--- a/src/transformers/models/deprecated/jukebox/convert_jukebox.py
+++ b/src/transformers/models/deprecated/jukebox/convert_jukebox.py
@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None):
    weight_dict = []
    mapping = {}
    for i, dict_name in enumerate(model_to_convert):
-        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"]
+        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"]
        new_dic = {}
        for k in old_dic.keys():
--- a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
    print(
        "Original Mega encoder:",
        original_mlm.mega.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu")
+            torch.load(
                os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True
            )
        ),
    )
    print(
        "Original Mega MLM layer:",
        original_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
            )
        ),
    )
@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
    print(
        "HF Mega MLM layer:",
        hf_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
            )
        ),
    )
--- a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
+++ b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
@ -129,7 +129,7 @@ def convert_weight_and_push(
    print(f"Downloading weights for {name}...")
    checkpoint_path = cached_download(checkpoint)
    print(f"Converting {name}...")
-    from_state_dict = torch.load(checkpoint_path)["state_dict"]
+    from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"]
    from_model.load_state_dict(from_state_dict)
    from_model.eval()
    with torch.no_grad():
--- a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
+++ b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
        filename=f"{filename}",
    )
-    state_dict = torch.load(filepath, map_location="cpu")
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
    # rename keys
    rename_keys = create_rename_keys(config)
    for src, dest in rename_keys:
--- a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
@ -27,7 +27,7 @@ NEW_KEY = "lm_head.weight"
 def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
-    d = torch.load(checkpoint_path)
+    d = torch.load(checkpoint_path, weights_only=True)
    d[NEW_KEY] = d.pop(OLD_KEY)
    os.makedirs(pytorch_dump_folder_path, exist_ok=True)
    torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
--- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
@ -29,7 +29,9 @@ CheckpointState = collections.namedtuple(
 def load_states_from_checkpoint(model_file: str) -> CheckpointState:
    print(f"Reading saved model from {model_file}")
-    state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
+    state_dict = torch.load(
        model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True
    )
    return CheckpointState(**state_dict)
--- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
+++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
    config, expected_shape = get_dpt_config(checkpoint_url)
    # load original state_dict from URL
    # state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")
-    state_dict = torch.load(checkpoint_url, map_location="cpu")
+    state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True)
    # remove certain keys
    remove_ignore_keys_(state_dict)
    # rename keys
--- a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
+++ b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
@ -325,7 +325,7 @@ def convert_checkpoint(
    )
    feature_extractor.save_pretrained(pytorch_dump_folder_path)
-    original_checkpoint = torch.load(checkpoint_path)
+    original_checkpoint = torch.load(checkpoint_path, weights_only=True)
    if "best_state" in original_checkpoint:
        # we might have a training state saved, in which case discard the yaml results and just retain the weights
        original_checkpoint = original_checkpoint["best_state"]
--- a/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint(
    # Prepare the model
    model = FastSpeech2ConformerModel(config)
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
    hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
    model.load_state_dict(hf_compatible_state_dict)
--- a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
@ -104,7 +104,7 @@ def convert_hifigan_checkpoint(
    model = FastSpeech2ConformerHifiGan(config)
-    orig_checkpoint = torch.load(checkpoint_path)
+    orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
    load_weights(orig_checkpoint, model, config)
    model.save_pretrained(pytorch_dump_folder_path)
--- a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
+++ b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint(
    model = FastSpeech2ConformerModel(model_config)
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
    hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
    model.load_state_dict(hf_compatible_state_dict)
--- a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
+++ b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p
    encoder = Encoder()
    if os.path.exists(checkpoint_path):
-        ckpt = torch.load(checkpoint_path)
+        ckpt = torch.load(checkpoint_path, weights_only=True)
    else:
        ckpt = torch.hub.load_state_dict_from_url(checkpoint_path)
--- a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
+++ b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder
    codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False)
    if os.path.exists(checkpoint_path):
-        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    else:
        state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu")
--- a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
+++ b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
@ -87,7 +87,7 @@ def rename_state_dict(state_dict):
 def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False):
    sys.path.insert(0, ada_lib_path)
-    model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
+    model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
    state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
    state_dict = rename_state_dict(state_dict)
--- a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
+++ b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
    head_dim = config.head_dim
    print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
-    model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+    model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
    model_state_dict.pop("freqs_cis")
    state_dict = {}
--- a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
+++ b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
        for file in files:
            print(file)
-            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu")
+            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
            model_state_dict.update(loaded_state_dict)
    else:
        print("Model does not seem to be sharded")
-        model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+        model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
        model_state_dict.pop("freqs_cis")
    state_dict = {}
--- a/src/transformers/models/git/convert_git_to_pytorch.py
+++ b/src/transformers/models/git/convert_git_to_pytorch.py
@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal
    if "large" in model_name and not is_video and "large-r" not in model_name:
        # large checkpoints take way too long to download
        checkpoint_path = model_name_to_path[model_name]
-        state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    else:
        checkpoint_url = model_name_to_url[model_name]
        state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[
--- a/src/transformers/models/glm/convert_glm_weights_to_hf.py
+++ b/src/transformers/models/glm/convert_glm_weights_to_hf.py
@ -53,7 +53,7 @@ def load_weights(input_dir: str):
    elif bin_files:
        bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1]))
        for file in bin_files:
-            tensors = torch.load(file, map_location="cpu")
+            tensors = torch.load(file, map_location="cpu", weights_only=True)
            all_weights.update(tensors)
        return all_weights
--- a/src/transformers/models/glpn/convert_glpn_to_pytorch.py
+++ b/src/transformers/models/glpn/convert_glpn_to_pytorch.py
@ -140,7 +140,7 @@ def convert_glpn_checkpoint(checkpoint_path, pytorch_dump_folder_path, push_to_h
    logger.info("Converting model...")
    # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
    # rename keys
    state_dict = rename_keys(state_dict)
--- a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py
+++ b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py
@ -153,7 +153,7 @@ def main(args):
        raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}")
    # Load the model.
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # Load the config.
    config_megatron = checkpoint["hyper_parameters"]["cfg"]
--- a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py
+++ b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py
@ -163,7 +163,7 @@ def convert_groupvit_checkpoint(
    config = GroupViTConfig()
    model = GroupViTModel(config).eval()
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    new_state_dict = convert_state_dict(state_dict, config)
    missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False)
    assert missing_keys == ["text_model.embeddings.position_ids"]
--- a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py
@ -32,7 +32,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
    """
    Copy/paste/tweak model's weights to transformers design.
    """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS:
        raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}")
--- a/src/transformers/models/llama/convert_llama_weights_to_hf.py
+++ b/src/transformers/models/llama/convert_llama_weights_to_hf.py
@ -228,12 +228,17 @@ def write_model(
        if num_shards == 1:
            # Not sharded
            # (The sharded implementation would also work, but this is simpler.)
-            loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu")
+            loaded = torch.load(
                os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu", weights_only=True
            )
        else:
            # Sharded
            checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")])
            print("Loading in order:", checkpoint_list)
-            loaded = [torch.load(os.path.join(input_base_path, file), map_location="cpu") for file in checkpoint_list]
+            loaded = [
                torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
                for file in checkpoint_list
            ]
        param_count = 0
        index_dict = {"weight_map": {}}
        for layer_i in range(n_layers):
--- a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py
+++ b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py
@ -219,12 +219,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
    # verify inputs
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath, map_location="cpu")
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
    assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
    if model_id == "liuhaotian/llava-v1.6-mistral-7b":
        filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset")
-        original_input_ids = torch.load(filepath, map_location="cpu")
+        original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
        # replace -200 by image_token_index (since we use token ID = 32000 for the image token)
        original_input_ids[original_input_ids == -200] = image_token_index
        assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist()
@ -233,7 +233,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
        filepath = hf_hub_download(
            repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset"
        )
-        original_input_ids = torch.load(filepath, map_location="cpu")
+        original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
        # replace -200 by image_token_index
        original_input_ids[original_input_ids == -200] = image_token_index
--- a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py
+++ b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py
@ -212,7 +212,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
    filepath = hf_hub_download(
        repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset"
    )
-    original_pixel_values = torch.load(filepath, map_location="cpu")
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
    assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
    image_sizes = torch.tensor([[899, 1024]])
--- a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py
+++ b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py
@ -42,7 +42,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
    longformer = LongformerModel.from_pretrained(longformer_model)
    lightning_model = LightningModel(longformer)
-    ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"))
+    ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"), weights_only=True)
    lightning_model.load_state_dict(ckpt["state_dict"])
    # init longformer question answering model
--- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
@ -32,7 +32,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
    config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
    # Load in the weights from the checkpoint_path
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # Load the entity vocab file
    entity_vocab = load_entity_vocab(entity_vocab_path)
--- a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
-    m2m_100 = torch.load(checkpoint_path, map_location="cpu")
+    m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    args = m2m_100["args"] or m2m_100["cfg"]["model"]
    state_dict = m2m_100["model"]
    remove_ignore_keys_(state_dict)
--- a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py
@ -108,7 +108,7 @@ def convert_mamba_checkpoint_file_to_huggingface_model_file(
        )
    logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}")
    # Load weights and config from paths
-    original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu")
+    original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu", weights_only=True)
    with open(config_json_file, "r", encoding="utf-8") as json_file:
        original_ssm_config_dict = json.load(json_file)
--- a/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py
@ -38,7 +38,7 @@ def load_state_dict_from_safetensors(mamba2_checkpoint_path: str, ckpt_name: str
 def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]:
-    return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu")
+    return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu", weights_only=True)
 def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config:
--- a/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_mbart_checkpoint_from_disk(
    checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False
 ):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    remove_ignore_keys_(state_dict)
    vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
--- a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py
+++ b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py
@ -294,9 +294,9 @@ def main():
    if args.path_to_checkpoint.endswith(".zip"):
        with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
            with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
-                input_state_dict = torch.load(pytorch_dict, map_location="cpu")
+                input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
    else:
-        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
+        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
    if args.config_file == "":
        # Default config of megatron-bert 345m
--- a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py
+++ b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py
@ -275,7 +275,7 @@ def merge_transformers_sharded_states(path, num_checkpoints):
    state_dict = {}
    for i in range(1, num_checkpoints + 1):
        checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin")
-        current_chunk = torch.load(checkpoint_path, map_location="cpu")
+        current_chunk = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
        state_dict.update(current_chunk)
    return state_dict
@ -298,7 +298,7 @@ def get_megatron_sharded_states(args, tp_size, pp_size, pp_rank):
            checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name)
            if os.path.isfile(checkpoint_path):
                break
-        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
        tp_state_dicts.append(state_dict)
    return tp_state_dicts
@ -338,7 +338,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
            rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name)
            break
    print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}")
-    state_dict = torch.load(rank0_checkpoint_path, map_location="cpu")
+    state_dict = torch.load(rank0_checkpoint_path, map_location="cpu", weights_only=True)
    megatron_args = state_dict.get("args", None)
    if megatron_args is None:
        raise ValueError(
@ -634,7 +634,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
    sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")]
    if len(sub_dirs) == 1:
        checkpoint_name = "pytorch_model.bin"
-        state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu")
+        state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu", weights_only=True)
    else:
        num_checkpoints = len(sub_dirs) - 1
        state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints)
--- a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
+++ b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
@ -263,9 +263,9 @@ def main():
    if args.path_to_checkpoint.endswith(".zip"):
        with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
            with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
-                input_state_dict = torch.load(pytorch_dict, map_location="cpu")
+                input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
    else:
-        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
+        input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
    ds_args = input_state_dict.get("args", None)
--- a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py
+++ b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py
@ -208,7 +208,9 @@ def convert_and_write_model(input_dir: str, output_dir: str, max_position_embedd
    else:
        shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)]
        shards = sorted(shards, key=lambda x: int(x.split(".")[1]))
-        loaded_shards = [torch.load(os.path.join(input_dir, file), map_location="cpu") for file in shards]
+        loaded_shards = [
            torch.load(os.path.join(input_dir, file), map_location="cpu", weights_only=True) for file in shards
        ]
        full_state_dict = convert_state_dict_sharded(loaded_shards, config)
    # Load weights into model and resave them
--- a/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py
+++ b/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py
@ -94,7 +94,8 @@ def write_model(model_path, input_base_path, model_size, safe_serialization=True
    print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
    # Load weights
    loaded = [
-        torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu") for i in range(8)
+        torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu", weights_only=True)
        for i in range(8)
    ]
    merged_state_dict = {}
--- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py
+++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py
@ -342,10 +342,15 @@ def write_model(
            path = os.path.join(input_base_path, "consolidated.00.pth")
        else:
            path = os.path.join(input_base_path, "consolidated.pth")
-        loaded = [torch.load(path, map_location="cpu", mmap=True)]
+        loaded = [torch.load(path, map_location="cpu", mmap=True, weights_only=True)]
    else:
        loaded = [
-            torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu", mmap=True)
+            torch.load(
                os.path.join(input_base_path, f"consolidated.{i:02d}.pth"),
                map_location="cpu",
                mmap=True,
                weights_only=True,
            )
            for i in range(num_shards)
        ]
--- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
@ -33,7 +33,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
    config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
    # Load in the weights from the checkpoint_path
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["module"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["module"]
    # Load the entity vocab file
    entity_vocab = load_original_entity_vocab(entity_vocab_path)
--- a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py
+++ b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py
@ -199,7 +199,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
    config = get_mobilevit_config(mobilevit_name)
    # load original state_dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # load 🤗 model
    if mobilevit_name.startswith("deeplabv3_"):
--- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py
+++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py
@ -239,7 +239,7 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
    config = get_mobilevitv2_config(task_name, orig_config_path)
    # load original state_dict
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # load huggingface model
    if task_name.startswith("ade20k_") or task_name.startswith("voc_"):
--- a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py
+++ b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py
@ -77,7 +77,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict):
 def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
    config = MraConfig.from_json_file(mra_config_file)
    model = MraForMaskedLM(config)
--- a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
@ -77,7 +77,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
    for expert in range(num_experts):
        expert_path = switch_checkpoint_path + f"-rank-{expert}.pt"
        if os.path.isfile(expert_path):
-            expert_state = torch.load(expert_path)["model"]
+            expert_state = torch.load(expert_path, weights_only=True)["model"]
            remove_ignore_keys_(expert_state)
            expert_state = rename_fairseq_keys(expert_state, expert)
            save_path = os.path.join(
@ -93,7 +93,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
    save_path = os.path.join(
        dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
    )
-    shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"]
+    shared_weights = torch.load(switch_checkpoint_path + "-shared.pt", weights_only=True)["model"]
    remove_ignore_keys_(shared_weights)
    shared_weights = rename_fairseq_keys(shared_weights, None)
    shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"]
--- a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
@ -78,7 +78,7 @@ def convert_checkpoint_helper(config, orig_state_dict):
 def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
    config = NystromformerConfig.from_json_file(nystromformer_config_file)
    model = NystromformerForMaskedLM(config)
--- a/src/transformers/models/olmo/convert_olmo_weights_to_hf.py
+++ b/src/transformers/models/olmo/convert_olmo_weights_to_hf.py
@ -91,7 +91,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
    # Not sharded
    # (The sharded implementation would also work, but this is simpler.)
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
    param_count = 0
    index_dict = {"weight_map": {}}
--- a/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py
+++ b/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py
@ -107,7 +107,7 @@ def write_model(
    # Not sharded
    # (The sharded implementation would also work, but this is simpler.)
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
    param_count = 0
    index_dict: Dict[str, Any] = {"weight_map": {}}
--- a/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py
+++ b/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py
@ -119,7 +119,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
    print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
    # Not sharded
-    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
+    loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
    param_count = 0
    index_dict = {"weight_map": {}}
--- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py
@ -29,9 +29,9 @@ logger = logging.get_logger(__name__)
 def load_checkpoint(checkpoint_path):
    """Checkpoint path should end in model.pt"""
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    if "model" in sd.keys():
-        sd = torch.load(checkpoint_path, map_location="cpu")["model"]
+        sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    # pop unnecessary weights
    keys_to_delete = [
--- a/src/transformers/models/owlv2/convert_owlv2_to_hf.py
+++ b/src/transformers/models/owlv2/convert_owlv2_to_hf.py
@ -268,10 +268,10 @@ def convert_owlv2_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pa
    # Verify pixel_values and input_ids
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath).permute(0, 3, 1, 2)
+    original_pixel_values = torch.load(filepath, weights_only=True).permute(0, 3, 1, 2)
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset")
-    original_input_ids = torch.load(filepath).squeeze()
+    original_input_ids = torch.load(filepath, weights_only=True).squeeze()
    filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png")
    image = Image.open(filepath)
--- a/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py
+++ b/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py
@ -82,7 +82,7 @@ def convert_persimmon_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_mode
    import sys
    sys.path.insert(0, ada_lib_path)
-    model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
+    model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
    state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
    state_dict = rename_state_dict(state_dict)
--- a/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py
+++ b/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_plbart_checkpoint_from_disk(
    checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False
 ):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    remove_ignore_keys_(state_dict)
    vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
--- a/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py
+++ b/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py
@ -151,7 +151,7 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
    logger.info(f"Converting model {model_name}...")
    # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
    # rename keys
    state_dict = rename_keys(state_dict)
--- a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py
+++ b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py
@ -26,7 +26,7 @@ from transformers import Pop2PianoConfig, Pop2PianoForConditionalGeneration
 # This weights were downloaded from the official pop2piano repository
 # https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt
-official_weights = torch.load("./model-1999-val_0.67311615.ckpt")
+official_weights = torch.load("./model-1999-val_0.67311615.ckpt", weights_only=True)
 state_dict = {}
--- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py
+++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py
@ -173,7 +173,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
        filename=f"{filename}",
    )
-    state_dict = torch.load(filepath, map_location="cpu")["state_dict"]
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)["state_dict"]
    state_dict = {key[9:]: state_dict[key] for key in state_dict}
    # Convert state dict using mappings
--- a/src/transformers/models/pvt/convert_pvt_to_pytorch.py
+++ b/src/transformers/models/pvt/convert_pvt_to_pytorch.py
@ -165,7 +165,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
        raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
    config = PvtConfig(name_or_path=config_path)
    # load original model from https://github.com/whai362/PVT
-    state_dict = torch.load(pvt_checkpoint, map_location="cpu")
+    state_dict = torch.load(pvt_checkpoint, map_location="cpu", weights_only=True)
    rename_keys = create_rename_keys(config)
    for src, dest in rename_keys:
--- a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py
+++ b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py
@ -207,7 +207,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
        )
    config = PvtV2Config.from_pretrained(config_path)
    # load original model from https://github.com/whai362/PVT
-    state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu")
+    state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu", weights_only=True)
    rename_keys = create_rename_keys(config)
    for src, dest in rename_keys:
--- a/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py
+++ b/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py
@ -71,7 +71,7 @@ LAYER_NAME_MAPPING = {"embedder.weight": "model.embed_tokens.weight"}
 def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32):
    print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
-    model_state_dict = torch.load(input_base_path, map_location="cpu")
+    model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)
    REPLACEMENT = {
        "blocks.": "layers.",
--- a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
@ -37,7 +37,9 @@ def convert_roberta_prelayernorm_checkpoint_to_pytorch(checkpoint_repo: str, pyt
    )
    # convert state_dict
-    original_state_dict = torch.load(hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"))
+    original_state_dict = torch.load(
        hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"), weights_only=True
    )
    state_dict = {}
    for tensor_key, tensor_value in original_state_dict.items():
        # The transformer implementation gives the model a unique name, rather than overwiriting 'roberta'
--- a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
+++ b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
@ -112,7 +112,7 @@ def convert_rmkv_checkpoint_to_hf_format(
    # 3. Download model file then convert state_dict
    model_file = hf_hub_download(repo_id, checkpoint_file)
-    state_dict = torch.load(model_file, map_location="cpu")
+    state_dict = torch.load(model_file, map_location="cpu", weights_only=True)
    state_dict = convert_state_dict(state_dict)
    # 4. Split in shards and save
@ -147,7 +147,7 @@ def convert_rmkv_checkpoint_to_hf_format(
        gc.collect()
        for shard_file in shard_files:
-            state_dict = torch.load(os.path.join(output_dir, shard_file))
+            state_dict = torch.load(os.path.join(output_dir, shard_file), weights_only=True)
            torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file))
    del state_dict
--- a/src/transformers/models/sam/convert_sam_to_hf.py
+++ b/src/transformers/models/sam/convert_sam_to_hf.py
@ -137,7 +137,7 @@ def replace_keys(state_dict):
 def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub):
    config = get_config(model_name)
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    state_dict = replace_keys(state_dict)
    image_processor = SamImageProcessor()
--- a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
+++ b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
@ -191,9 +191,9 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
    # load original state dict
    if encoder_only:
-        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
    else:
-        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))["state_dict"]
+        state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)["state_dict"]
    # rename keys
    state_dict = rename_keys(state_dict, encoder_only=encoder_only)
--- a/src/transformers/models/siglip/convert_siglip_to_hf.py
+++ b/src/transformers/models/siglip/convert_siglip_to_hf.py
@ -441,9 +441,9 @@ def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logit
            raise ValueError("Image size not supported")
        filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset")
-        original_pixel_values = torch.load(filepath)
+        original_pixel_values = torch.load(filepath, weights_only=True)
        filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset")
-        original_input_ids = torch.load(filepath)
+        original_input_ids = torch.load(filepath, weights_only=True)
        if "i18n" not in model_name:
            assert inputs.input_ids.tolist() == original_input_ids.tolist()
--- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
+++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
@ -52,7 +52,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path):
-    m2m_100 = torch.load(checkpoint_path, map_location="cpu")
+    m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    args = m2m_100["args"]
    state_dict = m2m_100["model"]
    lm_head_weights = state_dict["decoder.output_projection.weight"]
--- a/src/transformers/models/speecht5/convert_hifigan.py
+++ b/src/transformers/models/speecht5/convert_hifigan.py
@ -70,7 +70,7 @@ def convert_hifigan_checkpoint(
    model = SpeechT5HifiGan(config)
-    orig_checkpoint = torch.load(checkpoint_path)
+    orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
    load_weights(orig_checkpoint["model"]["generator"], model, config)
    stats = np.load(stats_path)
--- a/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
@ -361,7 +361,7 @@ def convert_speecht5_checkpoint(
    processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor)
    processor.save_pretrained(pytorch_dump_folder_path)
-    fairseq_checkpoint = torch.load(checkpoint_path)
+    fairseq_checkpoint = torch.load(checkpoint_path, weights_only=True)
    recursively_load_weights(fairseq_checkpoint["model"], model, task)
    model.save_pretrained(pytorch_dump_folder_path)
--- a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py
+++ b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py
@ -125,7 +125,7 @@ def convert_swiftformer_checkpoint(swiftformer_name, pytorch_dump_folder_path, o
        if original_ckpt.startswith("https"):
            checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True)
        else:
-            checkpoint = torch.load(original_ckpt, map_location="cpu")
+            checkpoint = torch.load(original_ckpt, map_location="cpu", weights_only=True)
    state_dict = checkpoint
    rename_keys = create_rename_keys(state_dict)
--- a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py
+++ b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py
@ -121,7 +121,7 @@ def convert_state_dict(orig_state_dict, model):
 def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub):
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
    config = get_swin_config(model_name)
    model = SwinForMaskedImageModeling(config)
--- a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py
+++ b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py
@ -143,7 +143,7 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
    # download original checkpoint, hosted on Google Drive
    output = "pytorch_model.bin"
    gdown.cached_download(checkpoint_url, output, quiet=False)
-    files = torch.load(output, map_location="cpu")
+    files = torch.load(output, map_location="cpu", weights_only=True)
    if "model" in files:
        state_dict = files["model"]
    elif "module" in files:
--- a/src/transformers/models/udop/convert_udop_to_hf.py
+++ b/src/transformers/models/udop/convert_udop_to_hf.py
@ -98,7 +98,7 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
    # load original state dict
    checkpoint_path = name_to_checkpoint_path[model_name]
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    print("Checkpoint path:", checkpoint_path)
@ -177,12 +177,12 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
    # autoregressive decoding with original input data
    print("Testing generation with original inputs...")
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset")
-    input_ids = torch.load(filepath)
+    input_ids = torch.load(filepath, weights_only=True)
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset")
-    bbox = torch.load(filepath)
+    bbox = torch.load(filepath, weights_only=True)
    pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt"
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset")
-    pixel_values = torch.load(filepath)
+    pixel_values = torch.load(filepath, weights_only=True)
    print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True))
    print("Bbox shape:", bbox.shape)
--- a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
    """
    Copy/paste/tweak model's weights to transformers design.
    """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    downstream_dict = checkpoint["Downstream"]
--- a/src/transformers/models/univnet/convert_univnet.py
+++ b/src/transformers/models/univnet/convert_univnet.py
@ -106,7 +106,7 @@ def convert_univnet_checkpoint(
    repo_id=None,
    safe_serialization=False,
 ):
-    model_state_dict_base = torch.load(checkpoint_path, map_location="cpu")
+    model_state_dict_base = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    # Get the generator's state dict
    state_dict = model_state_dict_base["model_g"]
--- a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py
+++ b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py
@ -99,7 +99,7 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p
    state_dict_temp = "pytorch_model-0000{i}-of-00002.bin"
    for shard in range(1, 3):
        state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard))
-        state_dict = torch.load(state_dict_path, map_location="cpu")
+        state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
        state_dict = convert_state_dict_to_hf(state_dict)
        model.load_state_dict(state_dict, strict=False, assign=True)
        model_state_dict -= set(state_dict.keys())
--- a/src/transformers/models/videomae/convert_videomae_to_pytorch.py
+++ b/src/transformers/models/videomae/convert_videomae_to_pytorch.py
@ -187,7 +187,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
    # download original checkpoint, hosted on Google Drive
    output = "pytorch_model.bin"
    gdown.cached_download(checkpoint_url, output, quiet=False)
-    files = torch.load(output, map_location="cpu")
+    files = torch.load(output, map_location="cpu", weights_only=True)
    if "model" in files:
        state_dict = files["model"]
    else:
@ -204,7 +204,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
    if "finetuned" not in model_name:
        local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt")
-        inputs["bool_masked_pos"] = torch.load(local_path)
+        inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True)
    outputs = model(**inputs)
    logits = outputs.logits
--- a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py
+++ b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py
@ -78,7 +78,7 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path
    state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin")
-    state_dict = torch.load(state_dict_path, map_location="cpu")
+    state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
    state_dict = convert_state_dict_to_hf(state_dict)
    model.load_state_dict(state_dict, strict=True, assign=True)
--- a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
@ -56,7 +56,7 @@ ACCEPTABLE_CHECKPOINTS = [
 def load_state_dict(checkpoint_path):
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    return sd
--- a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py
+++ b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py
@ -82,7 +82,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
    filename = model_name_to_filename[model_name]
    filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model")
-    state_dict = torch.load(filepath, map_location="cpu")
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
    # rename keys
    for key in state_dict.copy().keys():
--- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py
+++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py
@ -207,7 +207,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
    )
    print("Converting model...")
-    original_state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
+    original_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
    all_keys = list(original_state_dict.keys())
    new_keys = convert_old_keys_to_new_keys(all_keys)
@ -264,7 +264,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
    pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values
    filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset")
-    original_pixel_values = torch.load(filepath, map_location="cpu")["img"]
+    original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)["img"]
    # we allow for a small difference in the pixel values due to the original repository using cv2
    assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1)
--- a/src/transformers/models/vits/convert_original_checkpoint.py
+++ b/src/transformers/models/vits/convert_original_checkpoint.py
@ -346,7 +346,7 @@ def convert_checkpoint(
    model.decoder.apply_weight_norm()
-    orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
    recursively_load_weights(orig_checkpoint["model"], model)
    model.decoder.remove_weight_norm()
--- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
    """
    Copy/paste/tweak model's weights to transformers design.
    """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    downstream_dict = checkpoint["Downstream"]
--- a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
@ -179,7 +179,7 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro
@torch.no_grad()
 def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None):
    # load the pre-trained checkpoints
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=True)
    cfg = WavLMConfigOrig(checkpoint["cfg"])
    model = WavLMOrig(cfg)
    model.load_state_dict(checkpoint["model"])
--- a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
+++ b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
    """
    Copy/paste/tweak model's weights to transformers design.
    """
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
    downstream_dict = checkpoint["Downstream"]
--- a/src/transformers/models/whisper/convert_openai_to_hf.py
+++ b/src/transformers/models/whisper/convert_openai_to_hf.py
@ -157,7 +157,7 @@ def _download(url: str, root: str) -> Any:
    if os.path.isfile(download_target):
        model_bytes = open(download_target, "rb").read()
        if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
-            return torch.load(io.BytesIO(model_bytes))
+            return torch.load(io.BytesIO(model_bytes), weights_only=True)
        else:
            warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
@ -179,7 +179,7 @@ def _download(url: str, root: str) -> Any:
            "Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
        )
-    return torch.load(io.BytesIO(model_bytes))
+    return torch.load(io.BytesIO(model_bytes), weights_only=True)
 def convert_openai_whisper_to_tfms(
@ -190,7 +190,7 @@ def convert_openai_whisper_to_tfms(
        original_checkpoint = _download(_MODELS[checkpoint_path], root)
        openai_version = checkpoint_path
    else:
-        original_checkpoint = torch.load(checkpoint_path, map_location="cpu")
+        original_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
        openai_version = None
    dimensions = original_checkpoint["dims"]
--- a/Show More
+++ b/Show More