Add weights_only=True to torch.load (#37062)

This commit is contained in:
cyyever 2025-04-12 00:18:41 +08:00 committed by GitHub
parent bf46e44878
commit 28eae8b4bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
106 changed files with 161 additions and 136 deletions

View File

@ -122,7 +122,7 @@ class GlueDataset(Dataset):
with FileLock(lock_path): with FileLock(lock_path):
if os.path.exists(cached_features_file) and not args.overwrite_cache: if os.path.exists(cached_features_file) and not args.overwrite_cache:
start = time.time() start = time.time()
self.features = torch.load(cached_features_file) self.features = torch.load(cached_features_file, weights_only=True)
logger.info( logger.info(
f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
) )

View File

@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
if not os.path.exists(ckpt_path): if not os.path.exists(ckpt_path):
logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.") logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
_download(model_info["repo_id"], model_info["file_name"]) _download(model_info["repo_id"], model_info["file_name"])
checkpoint = torch.load(ckpt_path, map_location=device) checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
# this is a hack # this is a hack
model_args = checkpoint["model_args"] model_args = checkpoint["model_args"]
if "input_vocab_size" not in model_args: if "input_vocab_size" not in model_args:

View File

@ -71,7 +71,7 @@ def rename_key(dct, old, new):
def load_xsum_checkpoint(checkpoint_path): def load_xsum_checkpoint(checkpoint_path):
"""Checkpoint path should end in model.pt""" """Checkpoint path should end in model.pt"""
sd = torch.load(checkpoint_path, map_location="cpu") sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval() hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
hub_interface.model.load_state_dict(sd["model"]) hub_interface.model.load_state_dict(sd["model"])
return hub_interface return hub_interface

View File

@ -101,7 +101,7 @@ def main(raw_args=None):
model = BertModel.from_pretrained( model = BertModel.from_pretrained(
pretrained_model_name_or_path=args.model_name, pretrained_model_name_or_path=args.model_name,
state_dict=torch.load(args.pytorch_model_path), state_dict=torch.load(args.pytorch_model_path, weights_only=True),
cache_dir=args.cache_dir, cache_dir=args.cache_dir,
) )

View File

@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo
checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt") checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt")
if not os.path.isfile(checkpoint_file): if not os.path.isfile(checkpoint_file):
raise ValueError(f"path to the file {checkpoint_file} does not exist!") raise ValueError(f"path to the file {checkpoint_file} does not exist!")
chkpt = torch.load(checkpoint_file, map_location="cpu") chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True)
args = chkpt["cfg"]["model"] args = chkpt["cfg"]["model"]

View File

@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_
""" """
Copy/paste/tweak model's weights to our BERT structure. Copy/paste/tweak model's weights to our BERT structure.
""" """
model = torch.load(checkpoint_path, map_location="cpu") model = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
sd = model["model"] sd = model["model"]
cfg = BlenderbotConfig.from_json_file(config_json_path) cfg = BlenderbotConfig.from_json_file(config_json_path)
m = BlenderbotForConditionalGeneration(cfg) m = BlenderbotForConditionalGeneration(cfg)

View File

@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch(
for i in range(pretraining_tp): for i in range(pretraining_tp):
# load all TP files # load all TP files
f_name = file.replace("model_00", f"model_0{i}") f_name = file.replace("model_00", f"model_0{i}")
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu") temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
# Rename keys in the transformers names # Rename keys in the transformers names
keys = list(temp.keys()) keys = list(temp.keys())
@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch(
for i in range(pretraining_tp): for i in range(pretraining_tp):
# load all TP files # load all TP files
f_name = file.replace("model_00", f"model_0{i}") f_name = file.replace("model_00", f"model_0{i}")
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu") temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
# Rename keys in the transformers names # Rename keys in the transformers names
keys = list(temp.keys()) keys = list(temp.keys())

View File

@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
for possible_name in ["consolidated.pth", "consolidated.00.pth"]: for possible_name in ["consolidated.pth", "consolidated.00.pth"]:
possible_path = os.path.join(input_model_path, possible_name) possible_path = os.path.join(input_model_path, possible_name)
if os.path.exists(possible_path): if os.path.exists(possible_path):
loaded = torch.load(possible_path, map_location="cpu") loaded = torch.load(possible_path, map_location="cpu", weights_only=True)
break break
assert loaded is not None assert loaded is not None
else: else:
# Sharded # Sharded
loaded = [ loaded = [
torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu") torch.load(
os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True
)
for i in range(num_shards) for i in range(num_shards)
] ]
@ -314,7 +316,7 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
# Load VQGAN weights # Load VQGAN weights
vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt") vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt")
vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"] vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"]
for k, v in vqgan_state_dict.items(): for k, v in vqgan_state_dict.items():
if "decoder" in k: if "decoder" in k:
continue # we dont do image generation yet continue # we dont do image generation yet

View File

@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c
hf_model = ChineseCLIPModel(config).eval() hf_model = ChineseCLIPModel(config).eval()
pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"] pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()} pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()}
copy_text_model_and_projection(hf_model, pt_weights) copy_text_model_and_projection(hf_model, pt_weights)

View File

@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
model = CLIPSegForImageSegmentation(config) model = CLIPSegForImageSegmentation(config)
model.eval() model.eval()
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# remove some keys # remove some keys
for key in state_dict.copy().keys(): for key in state_dict.copy().keys():

View File

@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path):
_download(url=each_model_url, root=each_model_path) _download(url=each_model_url, root=each_model_path)
if each_model_name == "clvp": if each_model_name == "clvp":
clvp_checkpoint = torch.load(each_model_path, map_location="cpu") clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
else: else:
decoder_checkpoint = torch.load(each_model_path, map_location="cpu") decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
# Converting the weights # Converting the weights
converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint)) converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint))

View File

@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo
model = CvtForImageClassification(config) model = CvtForImageClassification(config)
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k") image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
image_processor.size["shortest_edge"] = image_size image_processor.size["shortest_edge"] = image_size
original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu")) original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True)
huggingface_weights = OrderedDict() huggingface_weights = OrderedDict()
list_of_state_dict = [] list_of_state_dict = []

View File

@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p
config.id2label = id2label config.id2label = id2label
config.label2id = {v: k for k, v in id2label.items()} config.label2id = {v: k for k, v in id2label.items()}
# load original model from local path # load original model from local path
loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"] loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"]
# Renaming the original model state dictionary to HF compatibile # Renaming the original model state dictionary to HF compatibile
all_keys = list(loaded.keys()) all_keys = list(loaded.keys())
new_keys = convert_old_keys_to_new_keys(all_keys) new_keys = convert_old_keys_to_new_keys(all_keys)

View File

@ -205,7 +205,7 @@ def convert_checkpoint(
sample_rate=16000, sample_rate=16000,
repo_id=None, repo_id=None,
): ):
model_dict = torch.load(checkpoint_path, "cpu") model_dict = torch.load(checkpoint_path, "cpu", weights_only=True)
config = DacConfig() config = DacConfig()

View File

@ -224,7 +224,7 @@ def load_beit_model(args, is_finetuned, is_large):
) )
patch_size = model.patch_embed.patch_size patch_size = model.patch_embed.patch_size
args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1]) args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1])
checkpoint = torch.load(args.beit_checkpoint, map_location="cpu") checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True)
print(f"Load ckpt from {args.beit_checkpoint}") print(f"Load ckpt from {args.beit_checkpoint}")
checkpoint_model = None checkpoint_model = None

View File

@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint(
logger.info("Converting model...") logger.info("Converting model...")
# load original state dict # load original state dict
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
# rename keys # rename keys
for key in state_dict.copy().keys(): for key in state_dict.copy().keys():
val = state_dict.pop(key) val = state_dict.pop(key)

View File

@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
else: else:
raise ValueError(f"Model name {model_name} not supported") raise ValueError(f"Model name {model_name} not supported")
checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename) checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename)
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
# rename keys # rename keys
rename_keys = create_rename_keys(config) rename_keys = create_rename_keys(config)

View File

@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
else: else:
raise ValueError(f"Model name {model_name} not supported") raise ValueError(f"Model name {model_name} not supported")
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
# original state dict # original state dict
for name, param in state_dict.items(): for name, param in state_dict.items():

View File

@ -123,7 +123,7 @@ def prepare_img():
def convert_efficientformer_checkpoint( def convert_efficientformer_checkpoint(
checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool
): ):
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
config = EfficientFormerConfig.from_json_file(efficientformer_config_file) config = EfficientFormerConfig.from_json_file(efficientformer_config_file)
model = EfficientFormerForImageClassificationWithTeacher(config) model = EfficientFormerForImageClassificationWithTeacher(config)
model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1]) model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1])

View File

@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None):
weight_dict = [] weight_dict = []
mapping = {} mapping = {}
for i, dict_name in enumerate(model_to_convert): for i, dict_name in enumerate(model_to_convert):
old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"] old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"]
new_dic = {} new_dic = {}
for k in old_dic.keys(): for k in old_dic.keys():

View File

@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
print( print(
"Original Mega encoder:", "Original Mega encoder:",
original_mlm.mega.load_state_dict( original_mlm.mega.load_state_dict(
torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu") torch.load(
os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True
)
), ),
) )
print( print(
"Original Mega MLM layer:", "Original Mega MLM layer:",
original_mlm.mlm_head.load_state_dict( original_mlm.mlm_head.load_state_dict(
torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu") torch.load(
os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
)
), ),
) )
@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
print( print(
"HF Mega MLM layer:", "HF Mega MLM layer:",
hf_mlm.mlm_head.load_state_dict( hf_mlm.mlm_head.load_state_dict(
torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu") torch.load(
os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
)
), ),
) )

View File

@ -129,7 +129,7 @@ def convert_weight_and_push(
print(f"Downloading weights for {name}...") print(f"Downloading weights for {name}...")
checkpoint_path = cached_download(checkpoint) checkpoint_path = cached_download(checkpoint)
print(f"Converting {name}...") print(f"Converting {name}...")
from_state_dict = torch.load(checkpoint_path)["state_dict"] from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"]
from_model.load_state_dict(from_state_dict) from_model.load_state_dict(from_state_dict)
from_model.eval() from_model.eval()
with torch.no_grad(): with torch.no_grad():

View File

@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
filename=f"{filename}", filename=f"{filename}",
) )
state_dict = torch.load(filepath, map_location="cpu") state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
# rename keys # rename keys
rename_keys = create_rename_keys(config) rename_keys = create_rename_keys(config)
for src, dest in rename_keys: for src, dest in rename_keys:

View File

@ -27,7 +27,7 @@ NEW_KEY = "lm_head.weight"
def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str): def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
d = torch.load(checkpoint_path) d = torch.load(checkpoint_path, weights_only=True)
d[NEW_KEY] = d.pop(OLD_KEY) d[NEW_KEY] = d.pop(OLD_KEY)
os.makedirs(pytorch_dump_folder_path, exist_ok=True) os.makedirs(pytorch_dump_folder_path, exist_ok=True)
torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)) torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))

View File

@ -29,7 +29,9 @@ CheckpointState = collections.namedtuple(
def load_states_from_checkpoint(model_file: str) -> CheckpointState: def load_states_from_checkpoint(model_file: str) -> CheckpointState:
print(f"Reading saved model from {model_file}") print(f"Reading saved model from {model_file}")
state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu")) state_dict = torch.load(
model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True
)
return CheckpointState(**state_dict) return CheckpointState(**state_dict)

View File

@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
config, expected_shape = get_dpt_config(checkpoint_url) config, expected_shape = get_dpt_config(checkpoint_url)
# load original state_dict from URL # load original state_dict from URL
# state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu") # state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")
state_dict = torch.load(checkpoint_url, map_location="cpu") state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True)
# remove certain keys # remove certain keys
remove_ignore_keys_(state_dict) remove_ignore_keys_(state_dict)
# rename keys # rename keys

View File

@ -325,7 +325,7 @@ def convert_checkpoint(
) )
feature_extractor.save_pretrained(pytorch_dump_folder_path) feature_extractor.save_pretrained(pytorch_dump_folder_path)
original_checkpoint = torch.load(checkpoint_path) original_checkpoint = torch.load(checkpoint_path, weights_only=True)
if "best_state" in original_checkpoint: if "best_state" in original_checkpoint:
# we might have a training state saved, in which case discard the yaml results and just retain the weights # we might have a training state saved, in which case discard the yaml results and just retain the weights
original_checkpoint = original_checkpoint["best_state"] original_checkpoint = original_checkpoint["best_state"]

View File

@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint(
# Prepare the model # Prepare the model
model = FastSpeech2ConformerModel(config) model = FastSpeech2ConformerModel(config)
espnet_checkpoint = torch.load(checkpoint_path) espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint) hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
model.load_state_dict(hf_compatible_state_dict) model.load_state_dict(hf_compatible_state_dict)

View File

@ -104,7 +104,7 @@ def convert_hifigan_checkpoint(
model = FastSpeech2ConformerHifiGan(config) model = FastSpeech2ConformerHifiGan(config)
orig_checkpoint = torch.load(checkpoint_path) orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
load_weights(orig_checkpoint, model, config) load_weights(orig_checkpoint, model, config)
model.save_pretrained(pytorch_dump_folder_path) model.save_pretrained(pytorch_dump_folder_path)

View File

@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint(
model = FastSpeech2ConformerModel(model_config) model = FastSpeech2ConformerModel(model_config)
espnet_checkpoint = torch.load(checkpoint_path) espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint) hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
model.load_state_dict(hf_compatible_state_dict) model.load_state_dict(hf_compatible_state_dict)

View File

@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p
encoder = Encoder() encoder = Encoder()
if os.path.exists(checkpoint_path): if os.path.exists(checkpoint_path):
ckpt = torch.load(checkpoint_path) ckpt = torch.load(checkpoint_path, weights_only=True)
else: else:
ckpt = torch.hub.load_state_dict_from_url(checkpoint_path) ckpt = torch.hub.load_state_dict_from_url(checkpoint_path)

View File

@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder
codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False) codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False)
if os.path.exists(checkpoint_path): if os.path.exists(checkpoint_path):
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
else: else:
state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu") state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu")

View File

@ -87,7 +87,7 @@ def rename_state_dict(state_dict):
def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False): def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False):
sys.path.insert(0, ada_lib_path) sys.path.insert(0, ada_lib_path)
model_state_dict_base = torch.load(pt_model_path, map_location="cpu") model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".") state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
state_dict = rename_state_dict(state_dict) state_dict = rename_state_dict(state_dict)

View File

@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
head_dim = config.head_dim head_dim = config.head_dim
print(f"Fetching all parameters from the checkpoint at '{input_base_path}'") print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"] model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
model_state_dict.pop("freqs_cis") model_state_dict.pop("freqs_cis")
state_dict = {} state_dict = {}

View File

@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
for file in files: for file in files:
print(file) print(file)
loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu") loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
model_state_dict.update(loaded_state_dict) model_state_dict.update(loaded_state_dict)
else: else:
print("Model does not seem to be sharded") print("Model does not seem to be sharded")
model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"] model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
model_state_dict.pop("freqs_cis") model_state_dict.pop("freqs_cis")
state_dict = {} state_dict = {}

View File

@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal
if "large" in model_name and not is_video and "large-r" not in model_name: if "large" in model_name and not is_video and "large-r" not in model_name:
# large checkpoints take way too long to download # large checkpoints take way too long to download
checkpoint_path = model_name_to_path[model_name] checkpoint_path = model_name_to_path[model_name]
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
else: else:
checkpoint_url = model_name_to_url[model_name] checkpoint_url = model_name_to_url[model_name]
state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[ state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[

View File

@ -53,7 +53,7 @@ def load_weights(input_dir: str):
elif bin_files: elif bin_files:
bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1])) bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1]))
for file in bin_files: for file in bin_files:
tensors = torch.load(file, map_location="cpu") tensors = torch.load(file, map_location="cpu", weights_only=True)
all_weights.update(tensors) all_weights.update(tensors)
return all_weights return all_weights

View File

@ -140,7 +140,7 @@ def convert_glpn_checkpoint(checkpoint_path, pytorch_dump_folder_path, push_to_h
logger.info("Converting model...") logger.info("Converting model...")
# load original state dict # load original state dict
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
# rename keys # rename keys
state_dict = rename_keys(state_dict) state_dict = rename_keys(state_dict)

View File

@ -153,7 +153,7 @@ def main(args):
raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}") raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}")
# Load the model. # Load the model.
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# Load the config. # Load the config.
config_megatron = checkpoint["hyper_parameters"]["cfg"] config_megatron = checkpoint["hyper_parameters"]["cfg"]

View File

@ -163,7 +163,7 @@ def convert_groupvit_checkpoint(
config = GroupViTConfig() config = GroupViTConfig()
model = GroupViTModel(config).eval() model = GroupViTModel(config).eval()
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
new_state_dict = convert_state_dict(state_dict, config) new_state_dict = convert_state_dict(state_dict, config)
missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False) missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False)
assert missing_keys == ["text_model.embeddings.position_ids"] assert missing_keys == ["text_model.embeddings.position_ids"]

View File

@ -32,7 +32,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
""" """
Copy/paste/tweak model's weights to transformers design. Copy/paste/tweak model's weights to transformers design.
""" """
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS: if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS:
raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}") raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}")

View File

@ -228,12 +228,17 @@ def write_model(
if num_shards == 1: if num_shards == 1:
# Not sharded # Not sharded
# (The sharded implementation would also work, but this is simpler.) # (The sharded implementation would also work, but this is simpler.)
loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu") loaded = torch.load(
os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu", weights_only=True
)
else: else:
# Sharded # Sharded
checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")]) checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")])
print("Loading in order:", checkpoint_list) print("Loading in order:", checkpoint_list)
loaded = [torch.load(os.path.join(input_base_path, file), map_location="cpu") for file in checkpoint_list] loaded = [
torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
for file in checkpoint_list
]
param_count = 0 param_count = 0
index_dict = {"weight_map": {}} index_dict = {"weight_map": {}}
for layer_i in range(n_layers): for layer_i in range(n_layers):

View File

@ -219,12 +219,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
# verify inputs # verify inputs
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset")
original_pixel_values = torch.load(filepath, map_location="cpu") original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
assert torch.allclose(original_pixel_values, inputs.pixel_values.half()) assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
if model_id == "liuhaotian/llava-v1.6-mistral-7b": if model_id == "liuhaotian/llava-v1.6-mistral-7b":
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset")
original_input_ids = torch.load(filepath, map_location="cpu") original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
# replace -200 by image_token_index (since we use token ID = 32000 for the image token) # replace -200 by image_token_index (since we use token ID = 32000 for the image token)
original_input_ids[original_input_ids == -200] = image_token_index original_input_ids[original_input_ids == -200] = image_token_index
assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist() assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist()
@ -233,7 +233,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
filepath = hf_hub_download( filepath = hf_hub_download(
repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset" repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset"
) )
original_input_ids = torch.load(filepath, map_location="cpu") original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
# replace -200 by image_token_index # replace -200 by image_token_index
original_input_ids[original_input_ids == -200] = image_token_index original_input_ids[original_input_ids == -200] = image_token_index

View File

@ -212,7 +212,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
filepath = hf_hub_download( filepath = hf_hub_download(
repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset" repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset"
) )
original_pixel_values = torch.load(filepath, map_location="cpu") original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
assert torch.allclose(original_pixel_values, inputs.pixel_values.half()) assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
image_sizes = torch.tensor([[899, 1024]]) image_sizes = torch.tensor([[899, 1024]])

View File

@ -42,7 +42,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
longformer = LongformerModel.from_pretrained(longformer_model) longformer = LongformerModel.from_pretrained(longformer_model)
lightning_model = LightningModel(longformer) lightning_model = LightningModel(longformer)
ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu")) ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"), weights_only=True)
lightning_model.load_state_dict(ckpt["state_dict"]) lightning_model.load_state_dict(ckpt["state_dict"])
# init longformer question answering model # init longformer question answering model

View File

@ -32,7 +32,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"]) config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
# Load in the weights from the checkpoint_path # Load in the weights from the checkpoint_path
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# Load the entity vocab file # Load the entity vocab file
entity_vocab = load_entity_vocab(entity_vocab_path) entity_vocab = load_entity_vocab(entity_vocab_path)

View File

@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path): def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
m2m_100 = torch.load(checkpoint_path, map_location="cpu") m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
args = m2m_100["args"] or m2m_100["cfg"]["model"] args = m2m_100["args"] or m2m_100["cfg"]["model"]
state_dict = m2m_100["model"] state_dict = m2m_100["model"]
remove_ignore_keys_(state_dict) remove_ignore_keys_(state_dict)

View File

@ -108,7 +108,7 @@ def convert_mamba_checkpoint_file_to_huggingface_model_file(
) )
logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}") logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}")
# Load weights and config from paths # Load weights and config from paths
original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu") original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu", weights_only=True)
with open(config_json_file, "r", encoding="utf-8") as json_file: with open(config_json_file, "r", encoding="utf-8") as json_file:
original_ssm_config_dict = json.load(json_file) original_ssm_config_dict = json.load(json_file)

View File

@ -38,7 +38,7 @@ def load_state_dict_from_safetensors(mamba2_checkpoint_path: str, ckpt_name: str
def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]: def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]:
return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu") return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu", weights_only=True)
def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config: def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config:

View File

@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
def convert_fairseq_mbart_checkpoint_from_disk( def convert_fairseq_mbart_checkpoint_from_disk(
checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False
): ):
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
remove_ignore_keys_(state_dict) remove_ignore_keys_(state_dict)
vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0] vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]

View File

@ -294,9 +294,9 @@ def main():
if args.path_to_checkpoint.endswith(".zip"): if args.path_to_checkpoint.endswith(".zip"):
with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint: with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict: with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
input_state_dict = torch.load(pytorch_dict, map_location="cpu") input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
else: else:
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu") input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
if args.config_file == "": if args.config_file == "":
# Default config of megatron-bert 345m # Default config of megatron-bert 345m

View File

@ -275,7 +275,7 @@ def merge_transformers_sharded_states(path, num_checkpoints):
state_dict = {} state_dict = {}
for i in range(1, num_checkpoints + 1): for i in range(1, num_checkpoints + 1):
checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin") checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin")
current_chunk = torch.load(checkpoint_path, map_location="cpu") current_chunk = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
state_dict.update(current_chunk) state_dict.update(current_chunk)
return state_dict return state_dict
@ -298,7 +298,7 @@ def get_megatron_sharded_states(args, tp_size, pp_size, pp_rank):
checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name) checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name)
if os.path.isfile(checkpoint_path): if os.path.isfile(checkpoint_path):
break break
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
tp_state_dicts.append(state_dict) tp_state_dicts.append(state_dict)
return tp_state_dicts return tp_state_dicts
@ -338,7 +338,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name) rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name)
break break
print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}") print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}")
state_dict = torch.load(rank0_checkpoint_path, map_location="cpu") state_dict = torch.load(rank0_checkpoint_path, map_location="cpu", weights_only=True)
megatron_args = state_dict.get("args", None) megatron_args = state_dict.get("args", None)
if megatron_args is None: if megatron_args is None:
raise ValueError( raise ValueError(
@ -634,7 +634,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")] sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")]
if len(sub_dirs) == 1: if len(sub_dirs) == 1:
checkpoint_name = "pytorch_model.bin" checkpoint_name = "pytorch_model.bin"
state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu") state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu", weights_only=True)
else: else:
num_checkpoints = len(sub_dirs) - 1 num_checkpoints = len(sub_dirs) - 1
state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints) state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints)

View File

@ -263,9 +263,9 @@ def main():
if args.path_to_checkpoint.endswith(".zip"): if args.path_to_checkpoint.endswith(".zip"):
with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint: with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict: with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
input_state_dict = torch.load(pytorch_dict, map_location="cpu") input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
else: else:
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu") input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
ds_args = input_state_dict.get("args", None) ds_args = input_state_dict.get("args", None)

View File

@ -208,7 +208,9 @@ def convert_and_write_model(input_dir: str, output_dir: str, max_position_embedd
else: else:
shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)] shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)]
shards = sorted(shards, key=lambda x: int(x.split(".")[1])) shards = sorted(shards, key=lambda x: int(x.split(".")[1]))
loaded_shards = [torch.load(os.path.join(input_dir, file), map_location="cpu") for file in shards] loaded_shards = [
torch.load(os.path.join(input_dir, file), map_location="cpu", weights_only=True) for file in shards
]
full_state_dict = convert_state_dict_sharded(loaded_shards, config) full_state_dict = convert_state_dict_sharded(loaded_shards, config)
# Load weights into model and resave them # Load weights into model and resave them

View File

@ -94,7 +94,8 @@ def write_model(model_path, input_base_path, model_size, safe_serialization=True
print(f"Fetching all parameters from the checkpoint at {input_base_path}.") print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
# Load weights # Load weights
loaded = [ loaded = [
torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu") for i in range(8) torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu", weights_only=True)
for i in range(8)
] ]
merged_state_dict = {} merged_state_dict = {}

View File

@ -342,10 +342,15 @@ def write_model(
path = os.path.join(input_base_path, "consolidated.00.pth") path = os.path.join(input_base_path, "consolidated.00.pth")
else: else:
path = os.path.join(input_base_path, "consolidated.pth") path = os.path.join(input_base_path, "consolidated.pth")
loaded = [torch.load(path, map_location="cpu", mmap=True)] loaded = [torch.load(path, map_location="cpu", mmap=True, weights_only=True)]
else: else:
loaded = [ loaded = [
torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu", mmap=True) torch.load(
os.path.join(input_base_path, f"consolidated.{i:02d}.pth"),
map_location="cpu",
mmap=True,
weights_only=True,
)
for i in range(num_shards) for i in range(num_shards)
] ]

View File

@ -33,7 +33,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"]) config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
# Load in the weights from the checkpoint_path # Load in the weights from the checkpoint_path
state_dict = torch.load(checkpoint_path, map_location="cpu")["module"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["module"]
# Load the entity vocab file # Load the entity vocab file
entity_vocab = load_original_entity_vocab(entity_vocab_path) entity_vocab = load_original_entity_vocab(entity_vocab_path)

View File

@ -199,7 +199,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
config = get_mobilevit_config(mobilevit_name) config = get_mobilevit_config(mobilevit_name)
# load original state_dict # load original state_dict
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# load 🤗 model # load 🤗 model
if mobilevit_name.startswith("deeplabv3_"): if mobilevit_name.startswith("deeplabv3_"):

View File

@ -239,7 +239,7 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
config = get_mobilevitv2_config(task_name, orig_config_path) config = get_mobilevitv2_config(task_name, orig_config_path)
# load original state_dict # load original state_dict
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# load huggingface model # load huggingface model
if task_name.startswith("ade20k_") or task_name.startswith("voc_"): if task_name.startswith("ade20k_") or task_name.startswith("voc_"):

View File

@ -77,7 +77,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict):
def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path): def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path):
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
config = MraConfig.from_json_file(mra_config_file) config = MraConfig.from_json_file(mra_config_file)
model = MraForMaskedLM(config) model = MraForMaskedLM(config)

View File

@ -77,7 +77,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
for expert in range(num_experts): for expert in range(num_experts):
expert_path = switch_checkpoint_path + f"-rank-{expert}.pt" expert_path = switch_checkpoint_path + f"-rank-{expert}.pt"
if os.path.isfile(expert_path): if os.path.isfile(expert_path):
expert_state = torch.load(expert_path)["model"] expert_state = torch.load(expert_path, weights_only=True)["model"]
remove_ignore_keys_(expert_state) remove_ignore_keys_(expert_state)
expert_state = rename_fairseq_keys(expert_state, expert) expert_state = rename_fairseq_keys(expert_state, expert)
save_path = os.path.join( save_path = os.path.join(
@ -93,7 +93,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
save_path = os.path.join( save_path = os.path.join(
dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
) )
shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"] shared_weights = torch.load(switch_checkpoint_path + "-shared.pt", weights_only=True)["model"]
remove_ignore_keys_(shared_weights) remove_ignore_keys_(shared_weights)
shared_weights = rename_fairseq_keys(shared_weights, None) shared_weights = rename_fairseq_keys(shared_weights, None)
shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"] shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"]

View File

@ -78,7 +78,7 @@ def convert_checkpoint_helper(config, orig_state_dict):
def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path): def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path):
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
config = NystromformerConfig.from_json_file(nystromformer_config_file) config = NystromformerConfig.from_json_file(nystromformer_config_file)
model = NystromformerForMaskedLM(config) model = NystromformerForMaskedLM(config)

View File

@ -91,7 +91,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
# Not sharded # Not sharded
# (The sharded implementation would also work, but this is simpler.) # (The sharded implementation would also work, but this is simpler.)
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
param_count = 0 param_count = 0
index_dict = {"weight_map": {}} index_dict = {"weight_map": {}}

View File

@ -107,7 +107,7 @@ def write_model(
# Not sharded # Not sharded
# (The sharded implementation would also work, but this is simpler.) # (The sharded implementation would also work, but this is simpler.)
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
param_count = 0 param_count = 0
index_dict: Dict[str, Any] = {"weight_map": {}} index_dict: Dict[str, Any] = {"weight_map": {}}

View File

@ -119,7 +119,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
print(f"Fetching all parameters from the checkpoint at {input_base_path}.") print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
# Not sharded # Not sharded
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu") loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
param_count = 0 param_count = 0
index_dict = {"weight_map": {}} index_dict = {"weight_map": {}}

View File

@ -29,9 +29,9 @@ logger = logging.get_logger(__name__)
def load_checkpoint(checkpoint_path): def load_checkpoint(checkpoint_path):
"""Checkpoint path should end in model.pt""" """Checkpoint path should end in model.pt"""
sd = torch.load(checkpoint_path, map_location="cpu") sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
if "model" in sd.keys(): if "model" in sd.keys():
sd = torch.load(checkpoint_path, map_location="cpu")["model"] sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
# pop unnecessary weights # pop unnecessary weights
keys_to_delete = [ keys_to_delete = [

View File

@ -268,10 +268,10 @@ def convert_owlv2_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pa
# Verify pixel_values and input_ids # Verify pixel_values and input_ids
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset")
original_pixel_values = torch.load(filepath).permute(0, 3, 1, 2) original_pixel_values = torch.load(filepath, weights_only=True).permute(0, 3, 1, 2)
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset")
original_input_ids = torch.load(filepath).squeeze() original_input_ids = torch.load(filepath, weights_only=True).squeeze()
filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png") filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png")
image = Image.open(filepath) image = Image.open(filepath)

View File

@ -82,7 +82,7 @@ def convert_persimmon_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_mode
import sys import sys
sys.path.insert(0, ada_lib_path) sys.path.insert(0, ada_lib_path)
model_state_dict_base = torch.load(pt_model_path, map_location="cpu") model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".") state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
state_dict = rename_state_dict(state_dict) state_dict = rename_state_dict(state_dict)

View File

@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
def convert_fairseq_plbart_checkpoint_from_disk( def convert_fairseq_plbart_checkpoint_from_disk(
checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False
): ):
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
remove_ignore_keys_(state_dict) remove_ignore_keys_(state_dict)
vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0] vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]

View File

@ -151,7 +151,7 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
logger.info(f"Converting model {model_name}...") logger.info(f"Converting model {model_name}...")
# load original state dict # load original state dict
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
# rename keys # rename keys
state_dict = rename_keys(state_dict) state_dict = rename_keys(state_dict)

View File

@ -26,7 +26,7 @@ from transformers import Pop2PianoConfig, Pop2PianoForConditionalGeneration
# This weights were downloaded from the official pop2piano repository # This weights were downloaded from the official pop2piano repository
# https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt # https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt
official_weights = torch.load("./model-1999-val_0.67311615.ckpt") official_weights = torch.load("./model-1999-val_0.67311615.ckpt", weights_only=True)
state_dict = {} state_dict = {}

View File

@ -173,7 +173,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
filename=f"{filename}", filename=f"{filename}",
) )
state_dict = torch.load(filepath, map_location="cpu")["state_dict"] state_dict = torch.load(filepath, map_location="cpu", weights_only=True)["state_dict"]
state_dict = {key[9:]: state_dict[key] for key in state_dict} state_dict = {key[9:]: state_dict[key] for key in state_dict}
# Convert state dict using mappings # Convert state dict using mappings

View File

@ -165,7 +165,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given") raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
config = PvtConfig(name_or_path=config_path) config = PvtConfig(name_or_path=config_path)
# load original model from https://github.com/whai362/PVT # load original model from https://github.com/whai362/PVT
state_dict = torch.load(pvt_checkpoint, map_location="cpu") state_dict = torch.load(pvt_checkpoint, map_location="cpu", weights_only=True)
rename_keys = create_rename_keys(config) rename_keys = create_rename_keys(config)
for src, dest in rename_keys: for src, dest in rename_keys:

View File

@ -207,7 +207,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
) )
config = PvtV2Config.from_pretrained(config_path) config = PvtV2Config.from_pretrained(config_path)
# load original model from https://github.com/whai362/PVT # load original model from https://github.com/whai362/PVT
state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu") state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu", weights_only=True)
rename_keys = create_rename_keys(config) rename_keys = create_rename_keys(config)
for src, dest in rename_keys: for src, dest in rename_keys:

View File

@ -71,7 +71,7 @@ LAYER_NAME_MAPPING = {"embedder.weight": "model.embed_tokens.weight"}
def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32): def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32):
print(f"Fetching all parameters from the checkpoint at '{input_base_path}'") print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
model_state_dict = torch.load(input_base_path, map_location="cpu") model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)
REPLACEMENT = { REPLACEMENT = {
"blocks.": "layers.", "blocks.": "layers.",

View File

@ -37,7 +37,9 @@ def convert_roberta_prelayernorm_checkpoint_to_pytorch(checkpoint_repo: str, pyt
) )
# convert state_dict # convert state_dict
original_state_dict = torch.load(hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin")) original_state_dict = torch.load(
hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"), weights_only=True
)
state_dict = {} state_dict = {}
for tensor_key, tensor_value in original_state_dict.items(): for tensor_key, tensor_value in original_state_dict.items():
# The transformer implementation gives the model a unique name, rather than overwiriting 'roberta' # The transformer implementation gives the model a unique name, rather than overwiriting 'roberta'

View File

@ -112,7 +112,7 @@ def convert_rmkv_checkpoint_to_hf_format(
# 3. Download model file then convert state_dict # 3. Download model file then convert state_dict
model_file = hf_hub_download(repo_id, checkpoint_file) model_file = hf_hub_download(repo_id, checkpoint_file)
state_dict = torch.load(model_file, map_location="cpu") state_dict = torch.load(model_file, map_location="cpu", weights_only=True)
state_dict = convert_state_dict(state_dict) state_dict = convert_state_dict(state_dict)
# 4. Split in shards and save # 4. Split in shards and save
@ -147,7 +147,7 @@ def convert_rmkv_checkpoint_to_hf_format(
gc.collect() gc.collect()
for shard_file in shard_files: for shard_file in shard_files:
state_dict = torch.load(os.path.join(output_dir, shard_file)) state_dict = torch.load(os.path.join(output_dir, shard_file), weights_only=True)
torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file)) torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file))
del state_dict del state_dict

View File

@ -137,7 +137,7 @@ def replace_keys(state_dict):
def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub): def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub):
config = get_config(model_name) config = get_config(model_name)
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
state_dict = replace_keys(state_dict) state_dict = replace_keys(state_dict)
image_processor = SamImageProcessor() image_processor = SamImageProcessor()

View File

@ -191,9 +191,9 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
# load original state dict # load original state dict
if encoder_only: if encoder_only:
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
else: else:
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))["state_dict"] state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)["state_dict"]
# rename keys # rename keys
state_dict = rename_keys(state_dict, encoder_only=encoder_only) state_dict = rename_keys(state_dict, encoder_only=encoder_only)

View File

@ -441,9 +441,9 @@ def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logit
raise ValueError("Image size not supported") raise ValueError("Image size not supported")
filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset")
original_pixel_values = torch.load(filepath) original_pixel_values = torch.load(filepath, weights_only=True)
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset")
original_input_ids = torch.load(filepath) original_input_ids = torch.load(filepath, weights_only=True)
if "i18n" not in model_name: if "i18n" not in model_name:
assert inputs.input_ids.tolist() == original_input_ids.tolist() assert inputs.input_ids.tolist() == original_input_ids.tolist()

View File

@ -52,7 +52,7 @@ def make_linear_from_emb(emb):
def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path): def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path):
m2m_100 = torch.load(checkpoint_path, map_location="cpu") m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
args = m2m_100["args"] args = m2m_100["args"]
state_dict = m2m_100["model"] state_dict = m2m_100["model"]
lm_head_weights = state_dict["decoder.output_projection.weight"] lm_head_weights = state_dict["decoder.output_projection.weight"]

View File

@ -70,7 +70,7 @@ def convert_hifigan_checkpoint(
model = SpeechT5HifiGan(config) model = SpeechT5HifiGan(config)
orig_checkpoint = torch.load(checkpoint_path) orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
load_weights(orig_checkpoint["model"]["generator"], model, config) load_weights(orig_checkpoint["model"]["generator"], model, config)
stats = np.load(stats_path) stats = np.load(stats_path)

View File

@ -361,7 +361,7 @@ def convert_speecht5_checkpoint(
processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor) processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor)
processor.save_pretrained(pytorch_dump_folder_path) processor.save_pretrained(pytorch_dump_folder_path)
fairseq_checkpoint = torch.load(checkpoint_path) fairseq_checkpoint = torch.load(checkpoint_path, weights_only=True)
recursively_load_weights(fairseq_checkpoint["model"], model, task) recursively_load_weights(fairseq_checkpoint["model"], model, task)
model.save_pretrained(pytorch_dump_folder_path) model.save_pretrained(pytorch_dump_folder_path)

View File

@ -125,7 +125,7 @@ def convert_swiftformer_checkpoint(swiftformer_name, pytorch_dump_folder_path, o
if original_ckpt.startswith("https"): if original_ckpt.startswith("https"):
checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True) checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True)
else: else:
checkpoint = torch.load(original_ckpt, map_location="cpu") checkpoint = torch.load(original_ckpt, map_location="cpu", weights_only=True)
state_dict = checkpoint state_dict = checkpoint
rename_keys = create_rename_keys(state_dict) rename_keys = create_rename_keys(state_dict)

View File

@ -121,7 +121,7 @@ def convert_state_dict(orig_state_dict, model):
def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub): def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub):
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
config = get_swin_config(model_name) config = get_swin_config(model_name)
model = SwinForMaskedImageModeling(config) model = SwinForMaskedImageModeling(config)

View File

@ -143,7 +143,7 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
# download original checkpoint, hosted on Google Drive # download original checkpoint, hosted on Google Drive
output = "pytorch_model.bin" output = "pytorch_model.bin"
gdown.cached_download(checkpoint_url, output, quiet=False) gdown.cached_download(checkpoint_url, output, quiet=False)
files = torch.load(output, map_location="cpu") files = torch.load(output, map_location="cpu", weights_only=True)
if "model" in files: if "model" in files:
state_dict = files["model"] state_dict = files["model"]
elif "module" in files: elif "module" in files:

View File

@ -98,7 +98,7 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
# load original state dict # load original state dict
checkpoint_path = name_to_checkpoint_path[model_name] checkpoint_path = name_to_checkpoint_path[model_name]
state_dict = torch.load(checkpoint_path, map_location="cpu") state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
print("Checkpoint path:", checkpoint_path) print("Checkpoint path:", checkpoint_path)
@ -177,12 +177,12 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
# autoregressive decoding with original input data # autoregressive decoding with original input data
print("Testing generation with original inputs...") print("Testing generation with original inputs...")
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset")
input_ids = torch.load(filepath) input_ids = torch.load(filepath, weights_only=True)
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset")
bbox = torch.load(filepath) bbox = torch.load(filepath, weights_only=True)
pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt" pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt"
filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset")
pixel_values = torch.load(filepath) pixel_values = torch.load(filepath, weights_only=True)
print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True)) print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True))
print("Bbox shape:", bbox.shape) print("Bbox shape:", bbox.shape)

View File

@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
""" """
Copy/paste/tweak model's weights to transformers design. Copy/paste/tweak model's weights to transformers design.
""" """
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
downstream_dict = checkpoint["Downstream"] downstream_dict = checkpoint["Downstream"]

View File

@ -106,7 +106,7 @@ def convert_univnet_checkpoint(
repo_id=None, repo_id=None,
safe_serialization=False, safe_serialization=False,
): ):
model_state_dict_base = torch.load(checkpoint_path, map_location="cpu") model_state_dict_base = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
# Get the generator's state dict # Get the generator's state dict
state_dict = model_state_dict_base["model_g"] state_dict = model_state_dict_base["model_g"]

View File

@ -99,7 +99,7 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p
state_dict_temp = "pytorch_model-0000{i}-of-00002.bin" state_dict_temp = "pytorch_model-0000{i}-of-00002.bin"
for shard in range(1, 3): for shard in range(1, 3):
state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard)) state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard))
state_dict = torch.load(state_dict_path, map_location="cpu") state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
state_dict = convert_state_dict_to_hf(state_dict) state_dict = convert_state_dict_to_hf(state_dict)
model.load_state_dict(state_dict, strict=False, assign=True) model.load_state_dict(state_dict, strict=False, assign=True)
model_state_dict -= set(state_dict.keys()) model_state_dict -= set(state_dict.keys())

View File

@ -187,7 +187,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
# download original checkpoint, hosted on Google Drive # download original checkpoint, hosted on Google Drive
output = "pytorch_model.bin" output = "pytorch_model.bin"
gdown.cached_download(checkpoint_url, output, quiet=False) gdown.cached_download(checkpoint_url, output, quiet=False)
files = torch.load(output, map_location="cpu") files = torch.load(output, map_location="cpu", weights_only=True)
if "model" in files: if "model" in files:
state_dict = files["model"] state_dict = files["model"]
else: else:
@ -204,7 +204,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
if "finetuned" not in model_name: if "finetuned" not in model_name:
local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt") local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt")
inputs["bool_masked_pos"] = torch.load(local_path) inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True)
outputs = model(**inputs) outputs = model(**inputs)
logits = outputs.logits logits = outputs.logits

View File

@ -78,7 +78,7 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path
state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin") state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin")
state_dict = torch.load(state_dict_path, map_location="cpu") state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
state_dict = convert_state_dict_to_hf(state_dict) state_dict = convert_state_dict_to_hf(state_dict)
model.load_state_dict(state_dict, strict=True, assign=True) model.load_state_dict(state_dict, strict=True, assign=True)

View File

@ -56,7 +56,7 @@ ACCEPTABLE_CHECKPOINTS = [
def load_state_dict(checkpoint_path): def load_state_dict(checkpoint_path):
sd = torch.load(checkpoint_path, map_location="cpu") sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
return sd return sd

View File

@ -82,7 +82,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
filename = model_name_to_filename[model_name] filename = model_name_to_filename[model_name]
filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model") filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model")
state_dict = torch.load(filepath, map_location="cpu") state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
# rename keys # rename keys
for key in state_dict.copy().keys(): for key in state_dict.copy().keys():

View File

@ -207,7 +207,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
) )
print("Converting model...") print("Converting model...")
original_state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"] original_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
all_keys = list(original_state_dict.keys()) all_keys = list(original_state_dict.keys())
new_keys = convert_old_keys_to_new_keys(all_keys) new_keys = convert_old_keys_to_new_keys(all_keys)
@ -264,7 +264,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset") filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset")
original_pixel_values = torch.load(filepath, map_location="cpu")["img"] original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)["img"]
# we allow for a small difference in the pixel values due to the original repository using cv2 # we allow for a small difference in the pixel values due to the original repository using cv2
assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1) assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1)

View File

@ -346,7 +346,7 @@ def convert_checkpoint(
model.decoder.apply_weight_norm() model.decoder.apply_weight_norm()
orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu")) orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
recursively_load_weights(orig_checkpoint["model"], model) recursively_load_weights(orig_checkpoint["model"], model)
model.decoder.remove_weight_norm() model.decoder.remove_weight_norm()

View File

@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
""" """
Copy/paste/tweak model's weights to transformers design. Copy/paste/tweak model's weights to transformers design.
""" """
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
downstream_dict = checkpoint["Downstream"] downstream_dict = checkpoint["Downstream"]

View File

@ -179,7 +179,7 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro
@torch.no_grad() @torch.no_grad()
def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None): def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None):
# load the pre-trained checkpoints # load the pre-trained checkpoints
checkpoint = torch.load(checkpoint_path) checkpoint = torch.load(checkpoint_path, weights_only=True)
cfg = WavLMConfigOrig(checkpoint["cfg"]) cfg = WavLMConfigOrig(checkpoint["cfg"])
model = WavLMOrig(cfg) model = WavLMOrig(cfg)
model.load_state_dict(checkpoint["model"]) model.load_state_dict(checkpoint["model"])

View File

@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
""" """
Copy/paste/tweak model's weights to transformers design. Copy/paste/tweak model's weights to transformers design.
""" """
checkpoint = torch.load(checkpoint_path, map_location="cpu") checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
downstream_dict = checkpoint["Downstream"] downstream_dict = checkpoint["Downstream"]

View File

@ -157,7 +157,7 @@ def _download(url: str, root: str) -> Any:
if os.path.isfile(download_target): if os.path.isfile(download_target):
model_bytes = open(download_target, "rb").read() model_bytes = open(download_target, "rb").read()
if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256: if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
return torch.load(io.BytesIO(model_bytes)) return torch.load(io.BytesIO(model_bytes), weights_only=True)
else: else:
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
@ -179,7 +179,7 @@ def _download(url: str, root: str) -> Any:
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model." "Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
) )
return torch.load(io.BytesIO(model_bytes)) return torch.load(io.BytesIO(model_bytes), weights_only=True)
def convert_openai_whisper_to_tfms( def convert_openai_whisper_to_tfms(
@ -190,7 +190,7 @@ def convert_openai_whisper_to_tfms(
original_checkpoint = _download(_MODELS[checkpoint_path], root) original_checkpoint = _download(_MODELS[checkpoint_path], root)
openai_version = checkpoint_path openai_version = checkpoint_path
else: else:
original_checkpoint = torch.load(checkpoint_path, map_location="cpu") original_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
openai_version = None openai_version = None
dimensions = original_checkpoint["dims"] dimensions = original_checkpoint["dims"]

Some files were not shown because too many files have changed in this diff Show More