diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py index 72df3bece21..43a1b75e518 100644 --- a/src/transformers/data/datasets/glue.py +++ b/src/transformers/data/datasets/glue.py @@ -122,7 +122,7 @@ class GlueDataset(Dataset): with FileLock(lock_path): if os.path.exists(cached_features_file) and not args.overwrite_cache: start = time.time() - self.features = torch.load(cached_features_file) + self.features = torch.load(cached_features_file, weights_only=True) logger.info( f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start ) diff --git a/src/transformers/models/bark/convert_suno_to_hf.py b/src/transformers/models/bark/convert_suno_to_hf.py index f8c8399cb61..803656b623e 100644 --- a/src/transformers/models/bark/convert_suno_to_hf.py +++ b/src/transformers/models/bark/convert_suno_to_hf.py @@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"): if not os.path.exists(ckpt_path): logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.") _download(model_info["repo_id"], model_info["file_name"]) - checkpoint = torch.load(ckpt_path, map_location=device) + checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True) # this is a hack model_args = checkpoint["model_args"] if "input_vocab_size" not in model_args: diff --git a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py index e694d96ca0d..84dc415443f 100644 --- a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py @@ -71,7 +71,7 @@ def rename_key(dct, old, new): def load_xsum_checkpoint(checkpoint_path): """Checkpoint path should end in model.pt""" - sd = torch.load(checkpoint_path, map_location="cpu") + sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True) hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval() hub_interface.model.load_state_dict(sd["model"]) return hub_interface diff --git a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py index f7cb149053a..8e1e85d5c04 100644 --- a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py +++ b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py @@ -101,7 +101,7 @@ def main(raw_args=None): model = BertModel.from_pretrained( pretrained_model_name_or_path=args.model_name, - state_dict=torch.load(args.pytorch_model_path), + state_dict=torch.load(args.pytorch_model_path, weights_only=True), cache_dir=args.cache_dir, ) diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py index c930a850462..c390d2e39f6 100755 --- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py @@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt") if not os.path.isfile(checkpoint_file): raise ValueError(f"path to the file {checkpoint_file} does not exist!") - chkpt = torch.load(checkpoint_file, map_location="cpu") + chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True) args = chkpt["cfg"]["model"] diff --git a/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py index c5919b94d42..d8ce9b056c3 100644 --- a/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py @@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_ """ Copy/paste/tweak model's weights to our BERT structure. """ - model = torch.load(checkpoint_path, map_location="cpu") + model = torch.load(checkpoint_path, map_location="cpu", weights_only=True) sd = model["model"] cfg = BlenderbotConfig.from_json_file(config_json_path) m = BlenderbotForConditionalGeneration(cfg) diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index 40ba6240d3e..73d251875dc 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch( for i in range(pretraining_tp): # load all TP files f_name = file.replace("model_00", f"model_0{i}") - temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu") + temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True) # Rename keys in the transformers names keys = list(temp.keys()) @@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch( for i in range(pretraining_tp): # load all TP files f_name = file.replace("model_00", f"model_0{i}") - temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu") + temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True) # Rename keys in the transformers names keys = list(temp.keys()) diff --git a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py index ff45c9b597e..f74607f7b3c 100644 --- a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py +++ b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py @@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1): for possible_name in ["consolidated.pth", "consolidated.00.pth"]: possible_path = os.path.join(input_model_path, possible_name) if os.path.exists(possible_path): - loaded = torch.load(possible_path, map_location="cpu") + loaded = torch.load(possible_path, map_location="cpu", weights_only=True) break assert loaded is not None else: # Sharded loaded = [ - torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu") + torch.load( + os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True + ) for i in range(num_shards) ] @@ -314,7 +316,7 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1): # Load VQGAN weights vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt") - vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"] + vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"] for k, v in vqgan_state_dict.items(): if "decoder" in k: continue # we dont do image generation yet diff --git a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py index 02c4b7b754b..adc9300ef51 100644 --- a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py +++ b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py @@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c hf_model = ChineseCLIPModel(config).eval() - pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"] + pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"] pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()} copy_text_model_and_projection(hf_model, pt_weights) diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py index c614d61e5b3..be2cfdee87d 100644 --- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py +++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py @@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_ model = CLIPSegForImageSegmentation(config) model.eval() - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # remove some keys for key in state_dict.copy().keys(): diff --git a/src/transformers/models/clvp/convert_clvp_to_hf.py b/src/transformers/models/clvp/convert_clvp_to_hf.py index 4ae6fd42549..89babb3c4ca 100644 --- a/src/transformers/models/clvp/convert_clvp_to_hf.py +++ b/src/transformers/models/clvp/convert_clvp_to_hf.py @@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path): _download(url=each_model_url, root=each_model_path) if each_model_name == "clvp": - clvp_checkpoint = torch.load(each_model_path, map_location="cpu") + clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True) else: - decoder_checkpoint = torch.load(each_model_path, map_location="cpu") + decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True) # Converting the weights converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint)) diff --git a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py index 9f76c92887f..d39777680b1 100644 --- a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py @@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo model = CvtForImageClassification(config) image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k") image_processor.size["shortest_edge"] = image_size - original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu")) + original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True) huggingface_weights = OrderedDict() list_of_state_dict = [] diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py index a6e5081b484..ae3a6771015 100644 --- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py @@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p config.id2label = id2label config.label2id = {v: k for k, v in id2label.items()} # load original model from local path - loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"] + loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"] # Renaming the original model state dictionary to HF compatibile all_keys = list(loaded.keys()) new_keys = convert_old_keys_to_new_keys(all_keys) diff --git a/src/transformers/models/dac/convert_dac_checkpoint.py b/src/transformers/models/dac/convert_dac_checkpoint.py index bfeb96fbdd4..b1728a7da11 100644 --- a/src/transformers/models/dac/convert_dac_checkpoint.py +++ b/src/transformers/models/dac/convert_dac_checkpoint.py @@ -205,7 +205,7 @@ def convert_checkpoint( sample_rate=16000, repo_id=None, ): - model_dict = torch.load(checkpoint_path, "cpu") + model_dict = torch.load(checkpoint_path, "cpu", weights_only=True) config = DacConfig() diff --git a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py index 0c6f42f4ba7..3f9d7773516 100755 --- a/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py @@ -224,7 +224,7 @@ def load_beit_model(args, is_finetuned, is_large): ) patch_size = model.patch_embed.patch_size args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1]) - checkpoint = torch.load(args.beit_checkpoint, map_location="cpu") + checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True) print(f"Load ckpt from {args.beit_checkpoint}") checkpoint_model = None diff --git a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py index 781b823e96f..c88582eaccf 100644 --- a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py +++ b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py @@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint( logger.info("Converting model...") # load original state dict - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # rename keys for key in state_dict.copy().keys(): val = state_dict.pop(key) diff --git a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py index 60e93efe7c6..6436451190a 100644 --- a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py +++ b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py @@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub): else: raise ValueError(f"Model name {model_name} not supported") checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename) - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # rename keys rename_keys = create_rename_keys(config) diff --git a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py index 392750fa67a..c2e1ae6001d 100644 --- a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py +++ b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py @@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub): else: raise ValueError(f"Model name {model_name} not supported") - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # original state dict for name, param in state_dict.items(): diff --git a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py index 7431cd6136a..8ac9a13f5c5 100644 --- a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py @@ -123,7 +123,7 @@ def prepare_img(): def convert_efficientformer_checkpoint( checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool ): - orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] config = EfficientFormerConfig.from_json_file(efficientformer_config_file) model = EfficientFormerForImageClassificationWithTeacher(config) model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1]) diff --git a/src/transformers/models/deprecated/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py index 960c8f6ff57..aac3b2efe73 100644 --- a/src/transformers/models/deprecated/jukebox/convert_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/convert_jukebox.py @@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None): weight_dict = [] mapping = {} for i, dict_name in enumerate(model_to_convert): - old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"] + old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"] new_dic = {} for k in old_dic.keys(): diff --git a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py index 1f791dab240..c6dbb12890e 100644 --- a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py @@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i print( "Original Mega encoder:", original_mlm.mega.load_state_dict( - torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu") + torch.load( + os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True + ) ), ) print( "Original Mega MLM layer:", original_mlm.mlm_head.load_state_dict( - torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu") + torch.load( + os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True + ) ), ) @@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i print( "HF Mega MLM layer:", hf_mlm.mlm_head.load_state_dict( - torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu") + torch.load( + os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True + ) ), ) diff --git a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py index 466b14f6bad..cd87217f051 100644 --- a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py +++ b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py @@ -129,7 +129,7 @@ def convert_weight_and_push( print(f"Downloading weights for {name}...") checkpoint_path = cached_download(checkpoint) print(f"Converting {name}...") - from_state_dict = torch.load(checkpoint_path)["state_dict"] + from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"] from_model.load_state_dict(from_state_dict) from_model.eval() with torch.no_grad(): diff --git a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py index 5c6da13ae88..d43ff7f40dd 100644 --- a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py +++ b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py @@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve filename=f"{filename}", ) - state_dict = torch.load(filepath, map_location="cpu") + state_dict = torch.load(filepath, map_location="cpu", weights_only=True) # rename keys rename_keys = create_rename_keys(config) for src, dest in rename_keys: diff --git a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py index fbf34012924..03f38084cfb 100644 --- a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py @@ -27,7 +27,7 @@ NEW_KEY = "lm_head.weight" def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str): - d = torch.load(checkpoint_path) + d = torch.load(checkpoint_path, weights_only=True) d[NEW_KEY] = d.pop(OLD_KEY) os.makedirs(pytorch_dump_folder_path, exist_ok=True) torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)) diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py index d24c2f01db4..5151c0972a7 100644 --- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py @@ -29,7 +29,9 @@ CheckpointState = collections.namedtuple( def load_states_from_checkpoint(model_file: str) -> CheckpointState: print(f"Reading saved model from {model_file}") - state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu")) + state_dict = torch.load( + model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True + ) return CheckpointState(**state_dict) diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py index d7dc6d104f4..ceae9b84711 100644 --- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py @@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub config, expected_shape = get_dpt_config(checkpoint_url) # load original state_dict from URL # state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu") - state_dict = torch.load(checkpoint_url, map_location="cpu") + state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True) # remove certain keys remove_ignore_keys_(state_dict) # rename keys diff --git a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py index 4db97bd6883..f1fb0168705 100644 --- a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py +++ b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py @@ -325,7 +325,7 @@ def convert_checkpoint( ) feature_extractor.save_pretrained(pytorch_dump_folder_path) - original_checkpoint = torch.load(checkpoint_path) + original_checkpoint = torch.load(checkpoint_path, weights_only=True) if "best_state" in original_checkpoint: # we might have a training state saved, in which case discard the yaml results and just retain the weights original_checkpoint = original_checkpoint["best_state"] diff --git a/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py index bb9c432f822..3a5bb2d2e2e 100644 --- a/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py @@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint( # Prepare the model model = FastSpeech2ConformerModel(config) - espnet_checkpoint = torch.load(checkpoint_path) + espnet_checkpoint = torch.load(checkpoint_path, weights_only=True) hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint) model.load_state_dict(hf_compatible_state_dict) diff --git a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py index ec9f57ce714..70aada84bd5 100644 --- a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py +++ b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py @@ -104,7 +104,7 @@ def convert_hifigan_checkpoint( model = FastSpeech2ConformerHifiGan(config) - orig_checkpoint = torch.load(checkpoint_path) + orig_checkpoint = torch.load(checkpoint_path, weights_only=True) load_weights(orig_checkpoint, model, config) model.save_pretrained(pytorch_dump_folder_path) diff --git a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py index 2a780d5cf0b..6f840438dca 100644 --- a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py +++ b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py @@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint( model = FastSpeech2ConformerModel(model_config) - espnet_checkpoint = torch.load(checkpoint_path) + espnet_checkpoint = torch.load(checkpoint_path, weights_only=True) hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint) model.load_state_dict(hf_compatible_state_dict) diff --git a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py index 7b544125114..6408d0e1df0 100644 --- a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py +++ b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py @@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p encoder = Encoder() if os.path.exists(checkpoint_path): - ckpt = torch.load(checkpoint_path) + ckpt = torch.load(checkpoint_path, weights_only=True) else: ckpt = torch.hub.load_state_dict_from_url(checkpoint_path) diff --git a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py index 95ebb2bfdb2..8b6e536a3ab 100644 --- a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py +++ b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py @@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False) if os.path.exists(checkpoint_path): - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) else: state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu") diff --git a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py index 6d029c0d13a..29ef7859c9a 100644 --- a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py +++ b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py @@ -87,7 +87,7 @@ def rename_state_dict(state_dict): def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False): sys.path.insert(0, ada_lib_path) - model_state_dict_base = torch.load(pt_model_path, map_location="cpu") + model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True) state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".") state_dict = rename_state_dict(state_dict) diff --git a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py index 9b71be35bfa..fd275c157f3 100644 --- a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py +++ b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py @@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus head_dim = config.head_dim print(f"Fetching all parameters from the checkpoint at '{input_base_path}'") - model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"] + model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"] model_state_dict.pop("freqs_cis") state_dict = {} diff --git a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py index 1ad7d23c3c3..c41f9a2fdbb 100644 --- a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py +++ b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py @@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus for file in files: print(file) - loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu") + loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True) model_state_dict.update(loaded_state_dict) else: print("Model does not seem to be sharded") - model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"] + model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"] model_state_dict.pop("freqs_cis") state_dict = {} diff --git a/src/transformers/models/git/convert_git_to_pytorch.py b/src/transformers/models/git/convert_git_to_pytorch.py index 2f93a6b03a6..4a9d8a01599 100644 --- a/src/transformers/models/git/convert_git_to_pytorch.py +++ b/src/transformers/models/git/convert_git_to_pytorch.py @@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal if "large" in model_name and not is_video and "large-r" not in model_name: # large checkpoints take way too long to download checkpoint_path = model_name_to_path[model_name] - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] else: checkpoint_url = model_name_to_url[model_name] state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[ diff --git a/src/transformers/models/glm/convert_glm_weights_to_hf.py b/src/transformers/models/glm/convert_glm_weights_to_hf.py index 1053f984d7f..df1fd7537f4 100644 --- a/src/transformers/models/glm/convert_glm_weights_to_hf.py +++ b/src/transformers/models/glm/convert_glm_weights_to_hf.py @@ -53,7 +53,7 @@ def load_weights(input_dir: str): elif bin_files: bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1])) for file in bin_files: - tensors = torch.load(file, map_location="cpu") + tensors = torch.load(file, map_location="cpu", weights_only=True) all_weights.update(tensors) return all_weights diff --git a/src/transformers/models/glpn/convert_glpn_to_pytorch.py b/src/transformers/models/glpn/convert_glpn_to_pytorch.py index 5d18c3b73a5..51088fb7244 100644 --- a/src/transformers/models/glpn/convert_glpn_to_pytorch.py +++ b/src/transformers/models/glpn/convert_glpn_to_pytorch.py @@ -140,7 +140,7 @@ def convert_glpn_checkpoint(checkpoint_path, pytorch_dump_folder_path, push_to_h logger.info("Converting model...") # load original state dict - state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) # rename keys state_dict = rename_keys(state_dict) diff --git a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py index 2625701c1a7..c4e2ff67c5c 100644 --- a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py +++ b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py @@ -153,7 +153,7 @@ def main(args): raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}") # Load the model. - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # Load the config. config_megatron = checkpoint["hyper_parameters"]["cfg"] diff --git a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py index 059f10f6129..6bc28184985 100644 --- a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py +++ b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py @@ -163,7 +163,7 @@ def convert_groupvit_checkpoint( config = GroupViTConfig() model = GroupViTModel(config).eval() - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] new_state_dict = convert_state_dict(state_dict, config) missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False) assert missing_keys == ["text_model.embeddings.position_ids"] diff --git a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py index ff15b90088a..c66c41ce36b 100644 --- a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py @@ -32,7 +32,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode """ Copy/paste/tweak model's weights to transformers design. """ - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS: raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}") diff --git a/src/transformers/models/llama/convert_llama_weights_to_hf.py b/src/transformers/models/llama/convert_llama_weights_to_hf.py index eb2862eb203..84b5c53a916 100644 --- a/src/transformers/models/llama/convert_llama_weights_to_hf.py +++ b/src/transformers/models/llama/convert_llama_weights_to_hf.py @@ -228,12 +228,17 @@ def write_model( if num_shards == 1: # Not sharded # (The sharded implementation would also work, but this is simpler.) - loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu") + loaded = torch.load( + os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu", weights_only=True + ) else: # Sharded checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")]) print("Loading in order:", checkpoint_list) - loaded = [torch.load(os.path.join(input_base_path, file), map_location="cpu") for file in checkpoint_list] + loaded = [ + torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True) + for file in checkpoint_list + ] param_count = 0 index_dict = {"weight_map": {}} for layer_i in range(n_layers): diff --git a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py index 06edc5c9b1a..85f21d4a5be 100644 --- a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py +++ b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py @@ -219,12 +219,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): # verify inputs filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset") - original_pixel_values = torch.load(filepath, map_location="cpu") + original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True) assert torch.allclose(original_pixel_values, inputs.pixel_values.half()) if model_id == "liuhaotian/llava-v1.6-mistral-7b": filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset") - original_input_ids = torch.load(filepath, map_location="cpu") + original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True) # replace -200 by image_token_index (since we use token ID = 32000 for the image token) original_input_ids[original_input_ids == -200] = image_token_index assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist() @@ -233,7 +233,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): filepath = hf_hub_download( repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset" ) - original_input_ids = torch.load(filepath, map_location="cpu") + original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True) # replace -200 by image_token_index original_input_ids[original_input_ids == -200] = image_token_index diff --git a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py index 65c57f624f5..bd8b9e3c4c9 100644 --- a/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py +++ b/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py @@ -212,7 +212,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): filepath = hf_hub_download( repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset" ) - original_pixel_values = torch.load(filepath, map_location="cpu") + original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True) assert torch.allclose(original_pixel_values, inputs.pixel_values.half()) image_sizes = torch.tensor([[899, 1024]]) diff --git a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py index 4ef2131228b..cbd7600e963 100644 --- a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py +++ b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py @@ -42,7 +42,7 @@ def convert_longformer_qa_checkpoint_to_pytorch( longformer = LongformerModel.from_pretrained(longformer_model) lightning_model = LightningModel(longformer) - ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu")) + ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"), weights_only=True) lightning_model.load_state_dict(ckpt["state_dict"]) # init longformer question answering model diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py index c86fa6e3089..aae550e8d09 100644 --- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py @@ -32,7 +32,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"]) # Load in the weights from the checkpoint_path - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # Load the entity vocab file entity_vocab = load_entity_vocab(entity_vocab_path) diff --git a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py index 97265fbdcf9..02e7ef23a08 100644 --- a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py @@ -43,7 +43,7 @@ def make_linear_from_emb(emb): def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path): - m2m_100 = torch.load(checkpoint_path, map_location="cpu") + m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True) args = m2m_100["args"] or m2m_100["cfg"]["model"] state_dict = m2m_100["model"] remove_ignore_keys_(state_dict) diff --git a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py index 0cf7dcc0eda..f55b032207c 100644 --- a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py +++ b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py @@ -108,7 +108,7 @@ def convert_mamba_checkpoint_file_to_huggingface_model_file( ) logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}") # Load weights and config from paths - original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu") + original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu", weights_only=True) with open(config_json_file, "r", encoding="utf-8") as json_file: original_ssm_config_dict = json.load(json_file) diff --git a/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py b/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py index f68e9bd4904..bd1d413f40a 100644 --- a/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py +++ b/src/transformers/models/mamba2/convert_mamba2_ssm_checkpoint_to_pytorch.py @@ -38,7 +38,7 @@ def load_state_dict_from_safetensors(mamba2_checkpoint_path: str, ckpt_name: str def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]: - return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu") + return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu", weights_only=True) def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config: diff --git a/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py b/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py index eb7f00bf771..909b7b41284 100644 --- a/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py @@ -43,7 +43,7 @@ def make_linear_from_emb(emb): def convert_fairseq_mbart_checkpoint_from_disk( checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False ): - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] remove_ignore_keys_(state_dict) vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0] diff --git a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py index 0fc67866301..cd29b7ad78c 100644 --- a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py +++ b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py @@ -294,9 +294,9 @@ def main(): if args.path_to_checkpoint.endswith(".zip"): with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint: with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict: - input_state_dict = torch.load(pytorch_dict, map_location="cpu") + input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True) else: - input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu") + input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True) if args.config_file == "": # Default config of megatron-bert 345m diff --git a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py index 284d8a3d454..548e2d1aeb3 100644 --- a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py +++ b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py @@ -275,7 +275,7 @@ def merge_transformers_sharded_states(path, num_checkpoints): state_dict = {} for i in range(1, num_checkpoints + 1): checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin") - current_chunk = torch.load(checkpoint_path, map_location="cpu") + current_chunk = torch.load(checkpoint_path, map_location="cpu", weights_only=True) state_dict.update(current_chunk) return state_dict @@ -298,7 +298,7 @@ def get_megatron_sharded_states(args, tp_size, pp_size, pp_rank): checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name) if os.path.isfile(checkpoint_path): break - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) tp_state_dicts.append(state_dict) return tp_state_dicts @@ -338,7 +338,7 @@ def convert_checkpoint_from_megatron_to_transformers(args): rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name) break print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}") - state_dict = torch.load(rank0_checkpoint_path, map_location="cpu") + state_dict = torch.load(rank0_checkpoint_path, map_location="cpu", weights_only=True) megatron_args = state_dict.get("args", None) if megatron_args is None: raise ValueError( @@ -634,7 +634,7 @@ def convert_checkpoint_from_transformers_to_megatron(args): sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")] if len(sub_dirs) == 1: checkpoint_name = "pytorch_model.bin" - state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu") + state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu", weights_only=True) else: num_checkpoints = len(sub_dirs) - 1 state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints) diff --git a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py index 38060f8af5c..5515b6d6155 100644 --- a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py +++ b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py @@ -263,9 +263,9 @@ def main(): if args.path_to_checkpoint.endswith(".zip"): with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint: with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict: - input_state_dict = torch.load(pytorch_dict, map_location="cpu") + input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True) else: - input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu") + input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True) ds_args = input_state_dict.get("args", None) diff --git a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py index e7f9da0015c..0f9cf597d5c 100644 --- a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py +++ b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py @@ -208,7 +208,9 @@ def convert_and_write_model(input_dir: str, output_dir: str, max_position_embedd else: shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)] shards = sorted(shards, key=lambda x: int(x.split(".")[1])) - loaded_shards = [torch.load(os.path.join(input_dir, file), map_location="cpu") for file in shards] + loaded_shards = [ + torch.load(os.path.join(input_dir, file), map_location="cpu", weights_only=True) for file in shards + ] full_state_dict = convert_state_dict_sharded(loaded_shards, config) # Load weights into model and resave them diff --git a/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py b/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py index 3309627b278..7e9f25d37f4 100644 --- a/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py +++ b/src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py @@ -94,7 +94,8 @@ def write_model(model_path, input_base_path, model_size, safe_serialization=True print(f"Fetching all parameters from the checkpoint at {input_base_path}.") # Load weights loaded = [ - torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu") for i in range(8) + torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu", weights_only=True) + for i in range(8) ] merged_state_dict = {} diff --git a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py index b2c40e27bb2..2d361af61e6 100644 --- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py +++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py @@ -342,10 +342,15 @@ def write_model( path = os.path.join(input_base_path, "consolidated.00.pth") else: path = os.path.join(input_base_path, "consolidated.pth") - loaded = [torch.load(path, map_location="cpu", mmap=True)] + loaded = [torch.load(path, map_location="cpu", mmap=True, weights_only=True)] else: loaded = [ - torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu", mmap=True) + torch.load( + os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), + map_location="cpu", + mmap=True, + weights_only=True, + ) for i in range(num_shards) ] diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py index f361082fb3c..5a74d4114ac 100644 --- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py @@ -33,7 +33,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"]) # Load in the weights from the checkpoint_path - state_dict = torch.load(checkpoint_path, map_location="cpu")["module"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["module"] # Load the entity vocab file entity_vocab = load_original_entity_vocab(entity_vocab_path) diff --git a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py index 522d6671d12..7dc6dfa288b 100644 --- a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py @@ -199,7 +199,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f config = get_mobilevit_config(mobilevit_name) # load original state_dict - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # load 🤗 model if mobilevit_name.startswith("deeplabv3_"): diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py index d08642666cd..485cbf5aa09 100644 --- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py @@ -239,7 +239,7 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path, config = get_mobilevitv2_config(task_name, orig_config_path) # load original state_dict - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # load huggingface model if task_name.startswith("ade20k_") or task_name.startswith("voc_"): diff --git a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py index f558f7c7bce..b35cd7662db 100644 --- a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py +++ b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py @@ -77,7 +77,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict): def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path): - orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] + orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"] config = MraConfig.from_json_file(mra_config_file) model = MraForMaskedLM(config) diff --git a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py index a84138a6246..317c5c713c7 100644 --- a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py @@ -77,7 +77,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig for expert in range(num_experts): expert_path = switch_checkpoint_path + f"-rank-{expert}.pt" if os.path.isfile(expert_path): - expert_state = torch.load(expert_path)["model"] + expert_state = torch.load(expert_path, weights_only=True)["model"] remove_ignore_keys_(expert_state) expert_state = rename_fairseq_keys(expert_state, expert) save_path = os.path.join( @@ -93,7 +93,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig save_path = os.path.join( dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") ) - shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"] + shared_weights = torch.load(switch_checkpoint_path + "-shared.pt", weights_only=True)["model"] remove_ignore_keys_(shared_weights) shared_weights = rename_fairseq_keys(shared_weights, None) shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"] diff --git a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py index 8d5a52bdbf8..6664a7d8ad0 100644 --- a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py @@ -78,7 +78,7 @@ def convert_checkpoint_helper(config, orig_state_dict): def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path): - orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] + orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"] config = NystromformerConfig.from_json_file(nystromformer_config_file) model = NystromformerForMaskedLM(config) diff --git a/src/transformers/models/olmo/convert_olmo_weights_to_hf.py b/src/transformers/models/olmo/convert_olmo_weights_to_hf.py index 0e77bdc69e7..b3a2ad80b01 100644 --- a/src/transformers/models/olmo/convert_olmo_weights_to_hf.py +++ b/src/transformers/models/olmo/convert_olmo_weights_to_hf.py @@ -91,7 +91,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa # Not sharded # (The sharded implementation would also work, but this is simpler.) - loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") + loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True) param_count = 0 index_dict = {"weight_map": {}} diff --git a/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py b/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py index d804e7a159e..1e8fb54ddb6 100644 --- a/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py +++ b/src/transformers/models/olmo2/convert_olmo2_weights_to_hf.py @@ -107,7 +107,7 @@ def write_model( # Not sharded # (The sharded implementation would also work, but this is simpler.) - loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") + loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True) param_count = 0 index_dict: Dict[str, Any] = {"weight_map": {}} diff --git a/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py b/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py index a14cd50a0e7..3fc5a49c7e5 100644 --- a/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py +++ b/src/transformers/models/olmoe/convert_olmoe_weights_to_hf.py @@ -119,7 +119,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa print(f"Fetching all parameters from the checkpoint at {input_base_path}.") # Not sharded - loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") + loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True) param_count = 0 index_dict = {"weight_map": {}} diff --git a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py index 486b477f973..9a9b0c306cb 100644 --- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py @@ -29,9 +29,9 @@ logger = logging.get_logger(__name__) def load_checkpoint(checkpoint_path): """Checkpoint path should end in model.pt""" - sd = torch.load(checkpoint_path, map_location="cpu") + sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True) if "model" in sd.keys(): - sd = torch.load(checkpoint_path, map_location="cpu")["model"] + sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # pop unnecessary weights keys_to_delete = [ diff --git a/src/transformers/models/owlv2/convert_owlv2_to_hf.py b/src/transformers/models/owlv2/convert_owlv2_to_hf.py index ed563b2c5bd..69665bab1d5 100644 --- a/src/transformers/models/owlv2/convert_owlv2_to_hf.py +++ b/src/transformers/models/owlv2/convert_owlv2_to_hf.py @@ -268,10 +268,10 @@ def convert_owlv2_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pa # Verify pixel_values and input_ids filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset") - original_pixel_values = torch.load(filepath).permute(0, 3, 1, 2) + original_pixel_values = torch.load(filepath, weights_only=True).permute(0, 3, 1, 2) filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset") - original_input_ids = torch.load(filepath).squeeze() + original_input_ids = torch.load(filepath, weights_only=True).squeeze() filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png") image = Image.open(filepath) diff --git a/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py b/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py index 6cd61b9f71c..c4b410fd3bb 100644 --- a/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py +++ b/src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py @@ -82,7 +82,7 @@ def convert_persimmon_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_mode import sys sys.path.insert(0, ada_lib_path) - model_state_dict_base = torch.load(pt_model_path, map_location="cpu") + model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True) state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".") state_dict = rename_state_dict(state_dict) diff --git a/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py b/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py index eac4a27d11c..0a2bb9553e0 100644 --- a/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py +++ b/src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py @@ -43,7 +43,7 @@ def make_linear_from_emb(emb): def convert_fairseq_plbart_checkpoint_from_disk( checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False ): - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] remove_ignore_keys_(state_dict) vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0] diff --git a/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py b/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py index e5fad6da1a3..ddcfb9cd241 100644 --- a/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py +++ b/src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py @@ -151,7 +151,7 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold logger.info(f"Converting model {model_name}...") # load original state dict - state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) # rename keys state_dict = rename_keys(state_dict) diff --git a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py index 54b8bb67e60..84788ac6aec 100644 --- a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py +++ b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py @@ -26,7 +26,7 @@ from transformers import Pop2PianoConfig, Pop2PianoForConditionalGeneration # This weights were downloaded from the official pop2piano repository # https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt -official_weights = torch.load("./model-1999-val_0.67311615.ckpt") +official_weights = torch.load("./model-1999-val_0.67311615.ckpt", weights_only=True) state_dict = {} diff --git a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py index 059a7933775..237be38fff3 100644 --- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py +++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py @@ -173,7 +173,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve filename=f"{filename}", ) - state_dict = torch.load(filepath, map_location="cpu")["state_dict"] + state_dict = torch.load(filepath, map_location="cpu", weights_only=True)["state_dict"] state_dict = {key[9:]: state_dict[key] for key in state_dict} # Convert state dict using mappings diff --git a/src/transformers/models/pvt/convert_pvt_to_pytorch.py b/src/transformers/models/pvt/convert_pvt_to_pytorch.py index 99002e3d67c..633d759123f 100644 --- a/src/transformers/models/pvt/convert_pvt_to_pytorch.py +++ b/src/transformers/models/pvt/convert_pvt_to_pytorch.py @@ -165,7 +165,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path): raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given") config = PvtConfig(name_or_path=config_path) # load original model from https://github.com/whai362/PVT - state_dict = torch.load(pvt_checkpoint, map_location="cpu") + state_dict = torch.load(pvt_checkpoint, map_location="cpu", weights_only=True) rename_keys = create_rename_keys(config) for src, dest in rename_keys: diff --git a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py index b5178cc2e99..b315d540dab 100644 --- a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py +++ b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py @@ -207,7 +207,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde ) config = PvtV2Config.from_pretrained(config_path) # load original model from https://github.com/whai362/PVT - state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu") + state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu", weights_only=True) rename_keys = create_rename_keys(config) for src, dest in rename_keys: diff --git a/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py b/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py index dc6619e217e..ea1cdd58ec9 100644 --- a/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py +++ b/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py @@ -71,7 +71,7 @@ LAYER_NAME_MAPPING = {"embedder.weight": "model.embed_tokens.weight"} def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32): print(f"Fetching all parameters from the checkpoint at '{input_base_path}'") - model_state_dict = torch.load(input_base_path, map_location="cpu") + model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True) REPLACEMENT = { "blocks.": "layers.", diff --git a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py index b8491db08b1..c4a6b03162f 100644 --- a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py @@ -37,7 +37,9 @@ def convert_roberta_prelayernorm_checkpoint_to_pytorch(checkpoint_repo: str, pyt ) # convert state_dict - original_state_dict = torch.load(hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin")) + original_state_dict = torch.load( + hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"), weights_only=True + ) state_dict = {} for tensor_key, tensor_value in original_state_dict.items(): # The transformer implementation gives the model a unique name, rather than overwiriting 'roberta' diff --git a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py index a0c97fc4e23..87d35db2236 100644 --- a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py +++ b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py @@ -112,7 +112,7 @@ def convert_rmkv_checkpoint_to_hf_format( # 3. Download model file then convert state_dict model_file = hf_hub_download(repo_id, checkpoint_file) - state_dict = torch.load(model_file, map_location="cpu") + state_dict = torch.load(model_file, map_location="cpu", weights_only=True) state_dict = convert_state_dict(state_dict) # 4. Split in shards and save @@ -147,7 +147,7 @@ def convert_rmkv_checkpoint_to_hf_format( gc.collect() for shard_file in shard_files: - state_dict = torch.load(os.path.join(output_dir, shard_file)) + state_dict = torch.load(os.path.join(output_dir, shard_file), weights_only=True) torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file)) del state_dict diff --git a/src/transformers/models/sam/convert_sam_to_hf.py b/src/transformers/models/sam/convert_sam_to_hf.py index dd8818b68cf..76d8884d951 100644 --- a/src/transformers/models/sam/convert_sam_to_hf.py +++ b/src/transformers/models/sam/convert_sam_to_hf.py @@ -137,7 +137,7 @@ def replace_keys(state_dict): def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub): config = get_config(model_name) - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) state_dict = replace_keys(state_dict) image_processor = SamImageProcessor() diff --git a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py index 3bbc86e433b..c84e006ad64 100644 --- a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py +++ b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py @@ -191,9 +191,9 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde # load original state dict if encoder_only: - state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) else: - state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))["state_dict"] + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)["state_dict"] # rename keys state_dict = rename_keys(state_dict, encoder_only=encoder_only) diff --git a/src/transformers/models/siglip/convert_siglip_to_hf.py b/src/transformers/models/siglip/convert_siglip_to_hf.py index 8b0a8a250dd..b61bd7ffb70 100644 --- a/src/transformers/models/siglip/convert_siglip_to_hf.py +++ b/src/transformers/models/siglip/convert_siglip_to_hf.py @@ -441,9 +441,9 @@ def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logit raise ValueError("Image size not supported") filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset") - original_pixel_values = torch.load(filepath) + original_pixel_values = torch.load(filepath, weights_only=True) filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset") - original_input_ids = torch.load(filepath) + original_input_ids = torch.load(filepath, weights_only=True) if "i18n" not in model_name: assert inputs.input_ids.tolist() == original_input_ids.tolist() diff --git a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py index eb4d8526247..9286fae776f 100644 --- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py +++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py @@ -52,7 +52,7 @@ def make_linear_from_emb(emb): def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path): - m2m_100 = torch.load(checkpoint_path, map_location="cpu") + m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True) args = m2m_100["args"] state_dict = m2m_100["model"] lm_head_weights = state_dict["decoder.output_projection.weight"] diff --git a/src/transformers/models/speecht5/convert_hifigan.py b/src/transformers/models/speecht5/convert_hifigan.py index 4d78bb73af3..b39012f8e25 100644 --- a/src/transformers/models/speecht5/convert_hifigan.py +++ b/src/transformers/models/speecht5/convert_hifigan.py @@ -70,7 +70,7 @@ def convert_hifigan_checkpoint( model = SpeechT5HifiGan(config) - orig_checkpoint = torch.load(checkpoint_path) + orig_checkpoint = torch.load(checkpoint_path, weights_only=True) load_weights(orig_checkpoint["model"]["generator"], model, config) stats = np.load(stats_path) diff --git a/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py index 20dea800d9d..c16e11d2b25 100644 --- a/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py @@ -361,7 +361,7 @@ def convert_speecht5_checkpoint( processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor) processor.save_pretrained(pytorch_dump_folder_path) - fairseq_checkpoint = torch.load(checkpoint_path) + fairseq_checkpoint = torch.load(checkpoint_path, weights_only=True) recursively_load_weights(fairseq_checkpoint["model"], model, task) model.save_pretrained(pytorch_dump_folder_path) diff --git a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py index 21ecebebe24..3567bb674e9 100644 --- a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py +++ b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py @@ -125,7 +125,7 @@ def convert_swiftformer_checkpoint(swiftformer_name, pytorch_dump_folder_path, o if original_ckpt.startswith("https"): checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True) else: - checkpoint = torch.load(original_ckpt, map_location="cpu") + checkpoint = torch.load(original_ckpt, map_location="cpu", weights_only=True) state_dict = checkpoint rename_keys = create_rename_keys(state_dict) diff --git a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py index 6402346289c..9a87ff693af 100644 --- a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py +++ b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py @@ -121,7 +121,7 @@ def convert_state_dict(orig_state_dict, model): def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub): - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] config = get_swin_config(model_name) model = SwinForMaskedImageModeling(config) diff --git a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py index ce4d13421ff..cda9b0c1827 100644 --- a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py +++ b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py @@ -143,7 +143,7 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod # download original checkpoint, hosted on Google Drive output = "pytorch_model.bin" gdown.cached_download(checkpoint_url, output, quiet=False) - files = torch.load(output, map_location="cpu") + files = torch.load(output, map_location="cpu", weights_only=True) if "model" in files: state_dict = files["model"] elif "module" in files: diff --git a/src/transformers/models/udop/convert_udop_to_hf.py b/src/transformers/models/udop/convert_udop_to_hf.py index f2d54b8ca54..8ba0de55df7 100644 --- a/src/transformers/models/udop/convert_udop_to_hf.py +++ b/src/transformers/models/udop/convert_udop_to_hf.py @@ -98,7 +98,7 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h # load original state dict checkpoint_path = name_to_checkpoint_path[model_name] - state_dict = torch.load(checkpoint_path, map_location="cpu") + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) print("Checkpoint path:", checkpoint_path) @@ -177,12 +177,12 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h # autoregressive decoding with original input data print("Testing generation with original inputs...") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset") - input_ids = torch.load(filepath) + input_ids = torch.load(filepath, weights_only=True) filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset") - bbox = torch.load(filepath) + bbox = torch.load(filepath, weights_only=True) pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt" filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset") - pixel_values = torch.load(filepath) + pixel_values = torch.load(filepath, weights_only=True) print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True)) print("Bbox shape:", bbox.shape) diff --git a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py index fca35acb634..0f1256e0ca3 100644 --- a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py @@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode """ Copy/paste/tweak model's weights to transformers design. """ - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) downstream_dict = checkpoint["Downstream"] diff --git a/src/transformers/models/univnet/convert_univnet.py b/src/transformers/models/univnet/convert_univnet.py index 30520b7fa14..f790efab22f 100644 --- a/src/transformers/models/univnet/convert_univnet.py +++ b/src/transformers/models/univnet/convert_univnet.py @@ -106,7 +106,7 @@ def convert_univnet_checkpoint( repo_id=None, safe_serialization=False, ): - model_state_dict_base = torch.load(checkpoint_path, map_location="cpu") + model_state_dict_base = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # Get the generator's state dict state_dict = model_state_dict_base["model_g"] diff --git a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py index 4c07ca0a03a..fff886f8a83 100644 --- a/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py +++ b/src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py @@ -99,7 +99,7 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p state_dict_temp = "pytorch_model-0000{i}-of-00002.bin" for shard in range(1, 3): state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard)) - state_dict = torch.load(state_dict_path, map_location="cpu") + state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True) state_dict = convert_state_dict_to_hf(state_dict) model.load_state_dict(state_dict, strict=False, assign=True) model_state_dict -= set(state_dict.keys()) diff --git a/src/transformers/models/videomae/convert_videomae_to_pytorch.py b/src/transformers/models/videomae/convert_videomae_to_pytorch.py index c98160a6bb8..011c1862eb6 100644 --- a/src/transformers/models/videomae/convert_videomae_to_pytorch.py +++ b/src/transformers/models/videomae/convert_videomae_to_pytorch.py @@ -187,7 +187,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_ # download original checkpoint, hosted on Google Drive output = "pytorch_model.bin" gdown.cached_download(checkpoint_url, output, quiet=False) - files = torch.load(output, map_location="cpu") + files = torch.load(output, map_location="cpu", weights_only=True) if "model" in files: state_dict = files["model"] else: @@ -204,7 +204,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_ if "finetuned" not in model_name: local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt") - inputs["bool_masked_pos"] = torch.load(local_path) + inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True) outputs = model(**inputs) logits = outputs.logits diff --git a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py index 2914cfdfcd4..f0fa69ab872 100644 --- a/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py +++ b/src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py @@ -78,7 +78,7 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin") - state_dict = torch.load(state_dict_path, map_location="cpu") + state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True) state_dict = convert_state_dict_to_hf(state_dict) model.load_state_dict(state_dict, strict=True, assign=True) diff --git a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py index 59733539415..ae5af9a343d 100644 --- a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py @@ -56,7 +56,7 @@ ACCEPTABLE_CHECKPOINTS = [ def load_state_dict(checkpoint_path): - sd = torch.load(checkpoint_path, map_location="cpu") + sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True) return sd diff --git a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py index bcc05563337..5153e1faf52 100644 --- a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py +++ b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py @@ -82,7 +82,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu filename = model_name_to_filename[model_name] filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model") - state_dict = torch.load(filepath, map_location="cpu") + state_dict = torch.load(filepath, map_location="cpu", weights_only=True) # rename keys for key in state_dict.copy().keys(): diff --git a/src/transformers/models/vitpose/convert_vitpose_to_hf.py b/src/transformers/models/vitpose/convert_vitpose_to_hf.py index 0d36e332a4f..e4666751a10 100644 --- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py +++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py @@ -207,7 +207,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True): ) print("Converting model...") - original_state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"] + original_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"] all_keys = list(original_state_dict.keys()) new_keys = convert_old_keys_to_new_keys(all_keys) @@ -264,7 +264,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True): pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset") - original_pixel_values = torch.load(filepath, map_location="cpu")["img"] + original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)["img"] # we allow for a small difference in the pixel values due to the original repository using cv2 assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1) diff --git a/src/transformers/models/vits/convert_original_checkpoint.py b/src/transformers/models/vits/convert_original_checkpoint.py index 267f72ccd08..7f122e86fa5 100644 --- a/src/transformers/models/vits/convert_original_checkpoint.py +++ b/src/transformers/models/vits/convert_original_checkpoint.py @@ -346,7 +346,7 @@ def convert_checkpoint( model.decoder.apply_weight_norm() - orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu")) + orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) recursively_load_weights(orig_checkpoint["model"], model) model.decoder.remove_weight_norm() diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py index 1702bc5a473..fa33416c8bd 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py @@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode """ Copy/paste/tweak model's weights to transformers design. """ - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) downstream_dict = checkpoint["Downstream"] diff --git a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py index e41ae0881d9..91d4853bade 100644 --- a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py @@ -179,7 +179,7 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro @torch.no_grad() def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None): # load the pre-trained checkpoints - checkpoint = torch.load(checkpoint_path) + checkpoint = torch.load(checkpoint_path, weights_only=True) cfg = WavLMConfigOrig(checkpoint["cfg"]) model = WavLMOrig(cfg) model.load_state_dict(checkpoint["model"]) diff --git a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py index 447d4db67fc..b8c4c337679 100644 --- a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py @@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode """ Copy/paste/tweak model's weights to transformers design. """ - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) downstream_dict = checkpoint["Downstream"] diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py index bb63cd24cd6..343fb5513b5 100755 --- a/src/transformers/models/whisper/convert_openai_to_hf.py +++ b/src/transformers/models/whisper/convert_openai_to_hf.py @@ -157,7 +157,7 @@ def _download(url: str, root: str) -> Any: if os.path.isfile(download_target): model_bytes = open(download_target, "rb").read() if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256: - return torch.load(io.BytesIO(model_bytes)) + return torch.load(io.BytesIO(model_bytes), weights_only=True) else: warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") @@ -179,7 +179,7 @@ def _download(url: str, root: str) -> Any: "Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model." ) - return torch.load(io.BytesIO(model_bytes)) + return torch.load(io.BytesIO(model_bytes), weights_only=True) def convert_openai_whisper_to_tfms( @@ -190,7 +190,7 @@ def convert_openai_whisper_to_tfms( original_checkpoint = _download(_MODELS[checkpoint_path], root) openai_version = checkpoint_path else: - original_checkpoint = torch.load(checkpoint_path, map_location="cpu") + original_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) openai_version = None dimensions = original_checkpoint["dims"] diff --git a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py index 8ff878f2cc9..6f36b190558 100644 --- a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py +++ b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py @@ -279,7 +279,7 @@ def convert_xclip_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_ if "drive" in checkpoint_url: output = "pytorch_model.bin" gdown.cached_download(checkpoint_url, output, quiet=False) - state_dict = torch.load(output, map_location="cpu")["model"] + state_dict = torch.load(output, map_location="cpu", weights_only=True)["model"] else: state_dict = torch.hub.load_state_dict_from_url(checkpoint_url)["model"] diff --git a/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py b/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py index f8b5dba3c1e..dc898196260 100644 --- a/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py +++ b/src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py @@ -26,7 +26,7 @@ def make_linear_from_emb(emb): def convert_fairseq_xglm_checkpoint_from_disk(checkpoint_path): - checkpoint = torch.load(checkpoint_path, map_location="cpu") + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) args = Namespace(**checkpoint["cfg"]["model"]) state_dict = checkpoint["model"] remove_ignore_keys_(state_dict) diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py index 71c3a1f989f..2e5a17921d0 100755 --- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py @@ -29,7 +29,7 @@ logging.set_verbosity_info() def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_path): # Load checkpoint - chkpt = torch.load(xlm_checkpoint_path, map_location="cpu") + chkpt = torch.load(xlm_checkpoint_path, map_location="cpu", weights_only=True) state_dict = chkpt["model"] diff --git a/src/transformers/models/yolos/convert_yolos_to_pytorch.py b/src/transformers/models/yolos/convert_yolos_to_pytorch.py index 6cddc606614..907a11d067b 100644 --- a/src/transformers/models/yolos/convert_yolos_to_pytorch.py +++ b/src/transformers/models/yolos/convert_yolos_to_pytorch.py @@ -163,7 +163,7 @@ def convert_yolos_checkpoint( config = get_yolos_config(yolos_name) # load original state_dict - state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] + state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # load 🤗 model model = YolosForObjectDetection(config) diff --git a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py index be46a4de81b..950769ae1e0 100644 --- a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py +++ b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py @@ -75,7 +75,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict): def convert_yoso_checkpoint(checkpoint_path, yoso_config_file, pytorch_dump_path): - orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] + orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"] config = YosoConfig.from_json_file(yoso_config_file) model = YosoForMaskedLM(config) diff --git a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py index 9a6701c35bc..cbf47a636b7 100644 --- a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py +++ b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py @@ -347,7 +347,7 @@ def convert_zoedepth_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu filename="zoedepth_pixel_values.pt", repo_type="dataset", ) - original_pixel_values = torch.load(filepath, map_location="cpu") + original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True) assert torch.allclose(pixel_values, original_pixel_values) # verify logits @@ -358,7 +358,7 @@ def convert_zoedepth_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu repo_type="dataset", revision="1865dbb81984f01c89e83eec10f8d07efd10743d", ) - cats_pixel_values = torch.load(filepath, map_location="cpu") + cats_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True) depth = model(cats_pixel_values).predicted_depth # Verify logits