mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Add weights_only=True to torch.load (#37062)
This commit is contained in:
parent
bf46e44878
commit
28eae8b4bd
@ -122,7 +122,7 @@ class GlueDataset(Dataset):
|
||||
with FileLock(lock_path):
|
||||
if os.path.exists(cached_features_file) and not args.overwrite_cache:
|
||||
start = time.time()
|
||||
self.features = torch.load(cached_features_file)
|
||||
self.features = torch.load(cached_features_file, weights_only=True)
|
||||
logger.info(
|
||||
f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
|
||||
)
|
||||
|
@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
|
||||
if not os.path.exists(ckpt_path):
|
||||
logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
|
||||
_download(model_info["repo_id"], model_info["file_name"])
|
||||
checkpoint = torch.load(ckpt_path, map_location=device)
|
||||
checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
|
||||
# this is a hack
|
||||
model_args = checkpoint["model_args"]
|
||||
if "input_vocab_size" not in model_args:
|
||||
|
@ -71,7 +71,7 @@ def rename_key(dct, old, new):
|
||||
|
||||
def load_xsum_checkpoint(checkpoint_path):
|
||||
"""Checkpoint path should end in model.pt"""
|
||||
sd = torch.load(checkpoint_path, map_location="cpu")
|
||||
sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
|
||||
hub_interface.model.load_state_dict(sd["model"])
|
||||
return hub_interface
|
||||
|
@ -101,7 +101,7 @@ def main(raw_args=None):
|
||||
|
||||
model = BertModel.from_pretrained(
|
||||
pretrained_model_name_or_path=args.model_name,
|
||||
state_dict=torch.load(args.pytorch_model_path),
|
||||
state_dict=torch.load(args.pytorch_model_path, weights_only=True),
|
||||
cache_dir=args.cache_dir,
|
||||
)
|
||||
|
||||
|
@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo
|
||||
checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt")
|
||||
if not os.path.isfile(checkpoint_file):
|
||||
raise ValueError(f"path to the file {checkpoint_file} does not exist!")
|
||||
chkpt = torch.load(checkpoint_file, map_location="cpu")
|
||||
chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True)
|
||||
|
||||
args = chkpt["cfg"]["model"]
|
||||
|
||||
|
@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_
|
||||
"""
|
||||
Copy/paste/tweak model's weights to our BERT structure.
|
||||
"""
|
||||
model = torch.load(checkpoint_path, map_location="cpu")
|
||||
model = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
sd = model["model"]
|
||||
cfg = BlenderbotConfig.from_json_file(config_json_path)
|
||||
m = BlenderbotForConditionalGeneration(cfg)
|
||||
|
@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch(
|
||||
for i in range(pretraining_tp):
|
||||
# load all TP files
|
||||
f_name = file.replace("model_00", f"model_0{i}")
|
||||
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
|
||||
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
|
||||
|
||||
# Rename keys in the transformers names
|
||||
keys = list(temp.keys())
|
||||
@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch(
|
||||
for i in range(pretraining_tp):
|
||||
# load all TP files
|
||||
f_name = file.replace("model_00", f"model_0{i}")
|
||||
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
|
||||
temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
|
||||
|
||||
# Rename keys in the transformers names
|
||||
keys = list(temp.keys())
|
||||
|
@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
|
||||
for possible_name in ["consolidated.pth", "consolidated.00.pth"]:
|
||||
possible_path = os.path.join(input_model_path, possible_name)
|
||||
if os.path.exists(possible_path):
|
||||
loaded = torch.load(possible_path, map_location="cpu")
|
||||
loaded = torch.load(possible_path, map_location="cpu", weights_only=True)
|
||||
break
|
||||
assert loaded is not None
|
||||
else:
|
||||
# Sharded
|
||||
loaded = [
|
||||
torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu")
|
||||
torch.load(
|
||||
os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True
|
||||
)
|
||||
for i in range(num_shards)
|
||||
]
|
||||
|
||||
@ -314,7 +316,7 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
|
||||
|
||||
# Load VQGAN weights
|
||||
vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt")
|
||||
vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"]
|
||||
vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"]
|
||||
for k, v in vqgan_state_dict.items():
|
||||
if "decoder" in k:
|
||||
continue # we dont do image generation yet
|
||||
|
@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c
|
||||
|
||||
hf_model = ChineseCLIPModel(config).eval()
|
||||
|
||||
pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
|
||||
pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
|
||||
pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()}
|
||||
|
||||
copy_text_model_and_projection(hf_model, pt_weights)
|
||||
|
@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
|
||||
model = CLIPSegForImageSegmentation(config)
|
||||
model.eval()
|
||||
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# remove some keys
|
||||
for key in state_dict.copy().keys():
|
||||
|
@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path):
|
||||
_download(url=each_model_url, root=each_model_path)
|
||||
|
||||
if each_model_name == "clvp":
|
||||
clvp_checkpoint = torch.load(each_model_path, map_location="cpu")
|
||||
clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
|
||||
else:
|
||||
decoder_checkpoint = torch.load(each_model_path, map_location="cpu")
|
||||
decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# Converting the weights
|
||||
converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint))
|
||||
|
@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo
|
||||
model = CvtForImageClassification(config)
|
||||
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
|
||||
image_processor.size["shortest_edge"] = image_size
|
||||
original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"))
|
||||
original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True)
|
||||
|
||||
huggingface_weights = OrderedDict()
|
||||
list_of_state_dict = []
|
||||
|
@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p
|
||||
config.id2label = id2label
|
||||
config.label2id = {v: k for k, v in id2label.items()}
|
||||
# load original model from local path
|
||||
loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"]
|
||||
loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"]
|
||||
# Renaming the original model state dictionary to HF compatibile
|
||||
all_keys = list(loaded.keys())
|
||||
new_keys = convert_old_keys_to_new_keys(all_keys)
|
||||
|
@ -205,7 +205,7 @@ def convert_checkpoint(
|
||||
sample_rate=16000,
|
||||
repo_id=None,
|
||||
):
|
||||
model_dict = torch.load(checkpoint_path, "cpu")
|
||||
model_dict = torch.load(checkpoint_path, "cpu", weights_only=True)
|
||||
|
||||
config = DacConfig()
|
||||
|
||||
|
@ -224,7 +224,7 @@ def load_beit_model(args, is_finetuned, is_large):
|
||||
)
|
||||
patch_size = model.patch_embed.patch_size
|
||||
args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1])
|
||||
checkpoint = torch.load(args.beit_checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True)
|
||||
|
||||
print(f"Load ckpt from {args.beit_checkpoint}")
|
||||
checkpoint_model = None
|
||||
|
@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint(
|
||||
logger.info("Converting model...")
|
||||
|
||||
# load original state dict
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
# rename keys
|
||||
for key in state_dict.copy().keys():
|
||||
val = state_dict.pop(key)
|
||||
|
@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
|
||||
else:
|
||||
raise ValueError(f"Model name {model_name} not supported")
|
||||
checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename)
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
|
||||
# rename keys
|
||||
rename_keys = create_rename_keys(config)
|
||||
|
@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
|
||||
else:
|
||||
raise ValueError(f"Model name {model_name} not supported")
|
||||
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
|
||||
# original state dict
|
||||
for name, param in state_dict.items():
|
||||
|
@ -123,7 +123,7 @@ def prepare_img():
|
||||
def convert_efficientformer_checkpoint(
|
||||
checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool
|
||||
):
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
config = EfficientFormerConfig.from_json_file(efficientformer_config_file)
|
||||
model = EfficientFormerForImageClassificationWithTeacher(config)
|
||||
model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1])
|
||||
|
@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None):
|
||||
weight_dict = []
|
||||
mapping = {}
|
||||
for i, dict_name in enumerate(model_to_convert):
|
||||
old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"]
|
||||
old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"]
|
||||
|
||||
new_dic = {}
|
||||
for k in old_dic.keys():
|
||||
|
@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
|
||||
print(
|
||||
"Original Mega encoder:",
|
||||
original_mlm.mega.load_state_dict(
|
||||
torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu")
|
||||
torch.load(
|
||||
os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True
|
||||
)
|
||||
),
|
||||
)
|
||||
print(
|
||||
"Original Mega MLM layer:",
|
||||
original_mlm.mlm_head.load_state_dict(
|
||||
torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
|
||||
torch.load(
|
||||
os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
|
||||
print(
|
||||
"HF Mega MLM layer:",
|
||||
hf_mlm.mlm_head.load_state_dict(
|
||||
torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
|
||||
torch.load(
|
||||
os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -129,7 +129,7 @@ def convert_weight_and_push(
|
||||
print(f"Downloading weights for {name}...")
|
||||
checkpoint_path = cached_download(checkpoint)
|
||||
print(f"Converting {name}...")
|
||||
from_state_dict = torch.load(checkpoint_path)["state_dict"]
|
||||
from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"]
|
||||
from_model.load_state_dict(from_state_dict)
|
||||
from_model.eval()
|
||||
with torch.no_grad():
|
||||
|
@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
|
||||
filename=f"{filename}",
|
||||
)
|
||||
|
||||
state_dict = torch.load(filepath, map_location="cpu")
|
||||
state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
# rename keys
|
||||
rename_keys = create_rename_keys(config)
|
||||
for src, dest in rename_keys:
|
||||
|
@ -27,7 +27,7 @@ NEW_KEY = "lm_head.weight"
|
||||
|
||||
|
||||
def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
|
||||
d = torch.load(checkpoint_path)
|
||||
d = torch.load(checkpoint_path, weights_only=True)
|
||||
d[NEW_KEY] = d.pop(OLD_KEY)
|
||||
os.makedirs(pytorch_dump_folder_path, exist_ok=True)
|
||||
torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
|
||||
|
@ -29,7 +29,9 @@ CheckpointState = collections.namedtuple(
|
||||
|
||||
def load_states_from_checkpoint(model_file: str) -> CheckpointState:
|
||||
print(f"Reading saved model from {model_file}")
|
||||
state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
|
||||
state_dict = torch.load(
|
||||
model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True
|
||||
)
|
||||
return CheckpointState(**state_dict)
|
||||
|
||||
|
||||
|
@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
|
||||
config, expected_shape = get_dpt_config(checkpoint_url)
|
||||
# load original state_dict from URL
|
||||
# state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_url, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True)
|
||||
# remove certain keys
|
||||
remove_ignore_keys_(state_dict)
|
||||
# rename keys
|
||||
|
@ -325,7 +325,7 @@ def convert_checkpoint(
|
||||
)
|
||||
feature_extractor.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
original_checkpoint = torch.load(checkpoint_path)
|
||||
original_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
if "best_state" in original_checkpoint:
|
||||
# we might have a training state saved, in which case discard the yaml results and just retain the weights
|
||||
original_checkpoint = original_checkpoint["best_state"]
|
||||
|
@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint(
|
||||
# Prepare the model
|
||||
model = FastSpeech2ConformerModel(config)
|
||||
|
||||
espnet_checkpoint = torch.load(checkpoint_path)
|
||||
espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
|
||||
|
||||
model.load_state_dict(hf_compatible_state_dict)
|
||||
|
@ -104,7 +104,7 @@ def convert_hifigan_checkpoint(
|
||||
|
||||
model = FastSpeech2ConformerHifiGan(config)
|
||||
|
||||
orig_checkpoint = torch.load(checkpoint_path)
|
||||
orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
load_weights(orig_checkpoint, model, config)
|
||||
|
||||
model.save_pretrained(pytorch_dump_folder_path)
|
||||
|
@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint(
|
||||
|
||||
model = FastSpeech2ConformerModel(model_config)
|
||||
|
||||
espnet_checkpoint = torch.load(checkpoint_path)
|
||||
espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
|
||||
model.load_state_dict(hf_compatible_state_dict)
|
||||
|
||||
|
@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p
|
||||
|
||||
encoder = Encoder()
|
||||
if os.path.exists(checkpoint_path):
|
||||
ckpt = torch.load(checkpoint_path)
|
||||
ckpt = torch.load(checkpoint_path, weights_only=True)
|
||||
else:
|
||||
ckpt = torch.hub.load_state_dict_from_url(checkpoint_path)
|
||||
|
||||
|
@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder
|
||||
codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False)
|
||||
|
||||
if os.path.exists(checkpoint_path):
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
else:
|
||||
state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu")
|
||||
|
||||
|
@ -87,7 +87,7 @@ def rename_state_dict(state_dict):
|
||||
|
||||
def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False):
|
||||
sys.path.insert(0, ada_lib_path)
|
||||
model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
|
||||
model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
|
||||
state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
|
||||
state_dict = rename_state_dict(state_dict)
|
||||
|
||||
|
@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
|
||||
head_dim = config.head_dim
|
||||
|
||||
print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
|
||||
model_state_dict.pop("freqs_cis")
|
||||
|
||||
state_dict = {}
|
||||
|
@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
|
||||
|
||||
for file in files:
|
||||
print(file)
|
||||
loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu")
|
||||
loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
|
||||
model_state_dict.update(loaded_state_dict)
|
||||
else:
|
||||
print("Model does not seem to be sharded")
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
|
||||
model_state_dict.pop("freqs_cis")
|
||||
|
||||
state_dict = {}
|
||||
|
@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal
|
||||
if "large" in model_name and not is_video and "large-r" not in model_name:
|
||||
# large checkpoints take way too long to download
|
||||
checkpoint_path = model_name_to_path[model_name]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
else:
|
||||
checkpoint_url = model_name_to_url[model_name]
|
||||
state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[
|
||||
|
@ -53,7 +53,7 @@ def load_weights(input_dir: str):
|
||||
elif bin_files:
|
||||
bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1]))
|
||||
for file in bin_files:
|
||||
tensors = torch.load(file, map_location="cpu")
|
||||
tensors = torch.load(file, map_location="cpu", weights_only=True)
|
||||
all_weights.update(tensors)
|
||||
return all_weights
|
||||
|
||||
|
@ -140,7 +140,7 @@ def convert_glpn_checkpoint(checkpoint_path, pytorch_dump_folder_path, push_to_h
|
||||
logger.info("Converting model...")
|
||||
|
||||
# load original state dict
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
|
||||
|
||||
# rename keys
|
||||
state_dict = rename_keys(state_dict)
|
||||
|
@ -153,7 +153,7 @@ def main(args):
|
||||
raise FileNotFoundError(f"ERROR! could not find file {checkpoint_path}")
|
||||
|
||||
# Load the model.
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# Load the config.
|
||||
config_megatron = checkpoint["hyper_parameters"]["cfg"]
|
||||
|
@ -163,7 +163,7 @@ def convert_groupvit_checkpoint(
|
||||
config = GroupViTConfig()
|
||||
model = GroupViTModel(config).eval()
|
||||
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
new_state_dict = convert_state_dict(state_dict, config)
|
||||
missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False)
|
||||
assert missing_keys == ["text_model.embeddings.position_ids"]
|
||||
|
@ -32,7 +32,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
|
||||
"""
|
||||
Copy/paste/tweak model's weights to transformers design.
|
||||
"""
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
if checkpoint["Config"]["downstream_expert"]["modelrc"]["select"] not in SUPPORTED_MODELS:
|
||||
raise NotImplementedError(f"The supported s3prl models are {SUPPORTED_MODELS}")
|
||||
|
||||
|
@ -228,12 +228,17 @@ def write_model(
|
||||
if num_shards == 1:
|
||||
# Not sharded
|
||||
# (The sharded implementation would also work, but this is simpler.)
|
||||
loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu")
|
||||
loaded = torch.load(
|
||||
os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu", weights_only=True
|
||||
)
|
||||
else:
|
||||
# Sharded
|
||||
checkpoint_list = sorted([file for file in os.listdir(input_base_path) if file.endswith(".pth")])
|
||||
print("Loading in order:", checkpoint_list)
|
||||
loaded = [torch.load(os.path.join(input_base_path, file), map_location="cpu") for file in checkpoint_list]
|
||||
loaded = [
|
||||
torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
|
||||
for file in checkpoint_list
|
||||
]
|
||||
param_count = 0
|
||||
index_dict = {"weight_map": {}}
|
||||
for layer_i in range(n_layers):
|
||||
|
@ -219,12 +219,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
|
||||
# verify inputs
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset")
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu")
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
|
||||
|
||||
if model_id == "liuhaotian/llava-v1.6-mistral-7b":
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset")
|
||||
original_input_ids = torch.load(filepath, map_location="cpu")
|
||||
original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
# replace -200 by image_token_index (since we use token ID = 32000 for the image token)
|
||||
original_input_ids[original_input_ids == -200] = image_token_index
|
||||
assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist()
|
||||
@ -233,7 +233,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
filepath = hf_hub_download(
|
||||
repo_id="nielsr/test-image", filename="llava_1_6_34b_input_ids.pt", repo_type="dataset"
|
||||
)
|
||||
original_input_ids = torch.load(filepath, map_location="cpu")
|
||||
original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
# replace -200 by image_token_index
|
||||
original_input_ids[original_input_ids == -200] = image_token_index
|
||||
|
||||
|
@ -212,7 +212,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
filepath = hf_hub_download(
|
||||
repo_id="RaushanTurganbay/test-image", filename="llava_onevision_pixel_values.pt", repo_type="dataset"
|
||||
)
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu")
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
|
||||
|
||||
image_sizes = torch.tensor([[899, 1024]])
|
||||
|
@ -42,7 +42,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
|
||||
longformer = LongformerModel.from_pretrained(longformer_model)
|
||||
lightning_model = LightningModel(longformer)
|
||||
|
||||
ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"))
|
||||
ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"), weights_only=True)
|
||||
lightning_model.load_state_dict(ckpt["state_dict"])
|
||||
|
||||
# init longformer question answering model
|
||||
|
@ -32,7 +32,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
|
||||
config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
|
||||
|
||||
# Load in the weights from the checkpoint_path
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# Load the entity vocab file
|
||||
entity_vocab = load_entity_vocab(entity_vocab_path)
|
||||
|
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
|
||||
|
||||
|
||||
def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
|
||||
m2m_100 = torch.load(checkpoint_path, map_location="cpu")
|
||||
m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
args = m2m_100["args"] or m2m_100["cfg"]["model"]
|
||||
state_dict = m2m_100["model"]
|
||||
remove_ignore_keys_(state_dict)
|
||||
|
@ -108,7 +108,7 @@ def convert_mamba_checkpoint_file_to_huggingface_model_file(
|
||||
)
|
||||
logger.info(f"Loading model from {mamba_checkpoint_path} based on config from {config_json_file}")
|
||||
# Load weights and config from paths
|
||||
original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu")
|
||||
original_state_dict = torch.load(mamba_checkpoint_path, map_location="cpu", weights_only=True)
|
||||
with open(config_json_file, "r", encoding="utf-8") as json_file:
|
||||
original_ssm_config_dict = json.load(json_file)
|
||||
|
||||
|
@ -38,7 +38,7 @@ def load_state_dict_from_safetensors(mamba2_checkpoint_path: str, ckpt_name: str
|
||||
|
||||
|
||||
def load_state_dict_from_torch(mamba2_checkpoint_path: str, ckpt_name: str) -> Dict[str, torch.Tensor]:
|
||||
return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu")
|
||||
return torch.load(path.join(mamba2_checkpoint_path, ckpt_name), map_location="cpu", weights_only=True)
|
||||
|
||||
|
||||
def convert_ssm_config_to_hf_config(config_ssm: Dict, mamba2_model_dict: Dict) -> Mamba2Config:
|
||||
|
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
|
||||
def convert_fairseq_mbart_checkpoint_from_disk(
|
||||
checkpoint_path, hf_config_path="facebook/mbart-large-en-ro", finetuned=False, mbart_50=False
|
||||
):
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
remove_ignore_keys_(state_dict)
|
||||
vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
|
||||
|
||||
|
@ -294,9 +294,9 @@ def main():
|
||||
if args.path_to_checkpoint.endswith(".zip"):
|
||||
with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
|
||||
with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
|
||||
input_state_dict = torch.load(pytorch_dict, map_location="cpu")
|
||||
input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
|
||||
else:
|
||||
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
|
||||
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
|
||||
|
||||
if args.config_file == "":
|
||||
# Default config of megatron-bert 345m
|
||||
|
@ -275,7 +275,7 @@ def merge_transformers_sharded_states(path, num_checkpoints):
|
||||
state_dict = {}
|
||||
for i in range(1, num_checkpoints + 1):
|
||||
checkpoint_path = os.path.join(path, f"pytorch_model-{i:05d}-of-{num_checkpoints:05d}.bin")
|
||||
current_chunk = torch.load(checkpoint_path, map_location="cpu")
|
||||
current_chunk = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
state_dict.update(current_chunk)
|
||||
return state_dict
|
||||
|
||||
@ -298,7 +298,7 @@ def get_megatron_sharded_states(args, tp_size, pp_size, pp_rank):
|
||||
checkpoint_path = os.path.join(args.load_path, sub_dir_name, checkpoint_name)
|
||||
if os.path.isfile(checkpoint_path):
|
||||
break
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
tp_state_dicts.append(state_dict)
|
||||
return tp_state_dicts
|
||||
|
||||
@ -338,7 +338,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
|
||||
rank0_checkpoint_path = os.path.join(args.load_path, sub_dir, rank0_checkpoint_name)
|
||||
break
|
||||
print(f"Loading Megatron-LM checkpoint arguments from: {rank0_checkpoint_path}")
|
||||
state_dict = torch.load(rank0_checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(rank0_checkpoint_path, map_location="cpu", weights_only=True)
|
||||
megatron_args = state_dict.get("args", None)
|
||||
if megatron_args is None:
|
||||
raise ValueError(
|
||||
@ -634,7 +634,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
|
||||
sub_dirs = [x for x in os.listdir(args.load_path) if x.startswith("pytorch_model")]
|
||||
if len(sub_dirs) == 1:
|
||||
checkpoint_name = "pytorch_model.bin"
|
||||
state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu")
|
||||
state_dict = torch.load(os.path.join(args.load_path, checkpoint_name), map_location="cpu", weights_only=True)
|
||||
else:
|
||||
num_checkpoints = len(sub_dirs) - 1
|
||||
state_dict = merge_transformers_sharded_states(args.load_path, num_checkpoints)
|
||||
|
@ -263,9 +263,9 @@ def main():
|
||||
if args.path_to_checkpoint.endswith(".zip"):
|
||||
with zipfile.ZipFile(args.path_to_checkpoint, "r") as checkpoint:
|
||||
with checkpoint.open("release/mp_rank_00/model_optim_rng.pt") as pytorch_dict:
|
||||
input_state_dict = torch.load(pytorch_dict, map_location="cpu")
|
||||
input_state_dict = torch.load(pytorch_dict, map_location="cpu", weights_only=True)
|
||||
else:
|
||||
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu")
|
||||
input_state_dict = torch.load(args.path_to_checkpoint, map_location="cpu", weights_only=True)
|
||||
|
||||
ds_args = input_state_dict.get("args", None)
|
||||
|
||||
|
@ -208,7 +208,9 @@ def convert_and_write_model(input_dir: str, output_dir: str, max_position_embedd
|
||||
else:
|
||||
shards = [file for file in os.listdir(input_dir) if re.match(r"consolidated.\d+.pth", file)]
|
||||
shards = sorted(shards, key=lambda x: int(x.split(".")[1]))
|
||||
loaded_shards = [torch.load(os.path.join(input_dir, file), map_location="cpu") for file in shards]
|
||||
loaded_shards = [
|
||||
torch.load(os.path.join(input_dir, file), map_location="cpu", weights_only=True) for file in shards
|
||||
]
|
||||
full_state_dict = convert_state_dict_sharded(loaded_shards, config)
|
||||
|
||||
# Load weights into model and resave them
|
||||
|
@ -94,7 +94,8 @@ def write_model(model_path, input_base_path, model_size, safe_serialization=True
|
||||
print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
|
||||
# Load weights
|
||||
loaded = [
|
||||
torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu") for i in range(8)
|
||||
torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pt"), map_location="cpu", weights_only=True)
|
||||
for i in range(8)
|
||||
]
|
||||
|
||||
merged_state_dict = {}
|
||||
|
@ -342,10 +342,15 @@ def write_model(
|
||||
path = os.path.join(input_base_path, "consolidated.00.pth")
|
||||
else:
|
||||
path = os.path.join(input_base_path, "consolidated.pth")
|
||||
loaded = [torch.load(path, map_location="cpu", mmap=True)]
|
||||
loaded = [torch.load(path, map_location="cpu", mmap=True, weights_only=True)]
|
||||
else:
|
||||
loaded = [
|
||||
torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu", mmap=True)
|
||||
torch.load(
|
||||
os.path.join(input_base_path, f"consolidated.{i:02d}.pth"),
|
||||
map_location="cpu",
|
||||
mmap=True,
|
||||
weights_only=True,
|
||||
)
|
||||
for i in range(num_shards)
|
||||
]
|
||||
|
||||
|
@ -33,7 +33,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
|
||||
config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])
|
||||
|
||||
# Load in the weights from the checkpoint_path
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["module"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["module"]
|
||||
|
||||
# Load the entity vocab file
|
||||
entity_vocab = load_original_entity_vocab(entity_vocab_path)
|
||||
|
@ -199,7 +199,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
|
||||
config = get_mobilevit_config(mobilevit_name)
|
||||
|
||||
# load original state_dict
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# load 🤗 model
|
||||
if mobilevit_name.startswith("deeplabv3_"):
|
||||
|
@ -239,7 +239,7 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
|
||||
config = get_mobilevitv2_config(task_name, orig_config_path)
|
||||
|
||||
# load original state_dict
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
# load huggingface model
|
||||
if task_name.startswith("ade20k_") or task_name.startswith("voc_"):
|
||||
|
@ -77,7 +77,7 @@ def convert_checkpoint_helper(max_position_embeddings, orig_state_dict):
|
||||
|
||||
|
||||
def convert_mra_checkpoint(checkpoint_path, mra_config_file, pytorch_dump_path):
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
|
||||
config = MraConfig.from_json_file(mra_config_file)
|
||||
model = MraForMaskedLM(config)
|
||||
|
||||
|
@ -77,7 +77,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
|
||||
for expert in range(num_experts):
|
||||
expert_path = switch_checkpoint_path + f"-rank-{expert}.pt"
|
||||
if os.path.isfile(expert_path):
|
||||
expert_state = torch.load(expert_path)["model"]
|
||||
expert_state = torch.load(expert_path, weights_only=True)["model"]
|
||||
remove_ignore_keys_(expert_state)
|
||||
expert_state = rename_fairseq_keys(expert_state, expert)
|
||||
save_path = os.path.join(
|
||||
@ -93,7 +93,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
|
||||
save_path = os.path.join(
|
||||
dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
|
||||
)
|
||||
shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"]
|
||||
shared_weights = torch.load(switch_checkpoint_path + "-shared.pt", weights_only=True)["model"]
|
||||
remove_ignore_keys_(shared_weights)
|
||||
shared_weights = rename_fairseq_keys(shared_weights, None)
|
||||
shared_weights["shared.weight"] = shared_weights["decoder.embed_tokens.weight"]
|
||||
|
@ -78,7 +78,7 @@ def convert_checkpoint_helper(config, orig_state_dict):
|
||||
|
||||
|
||||
def convert_nystromformer_checkpoint(checkpoint_path, nystromformer_config_file, pytorch_dump_path):
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
|
||||
orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model_state_dict"]
|
||||
config = NystromformerConfig.from_json_file(nystromformer_config_file)
|
||||
model = NystromformerForMaskedLM(config)
|
||||
|
||||
|
@ -91,7 +91,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
|
||||
|
||||
# Not sharded
|
||||
# (The sharded implementation would also work, but this is simpler.)
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
|
||||
|
||||
param_count = 0
|
||||
index_dict = {"weight_map": {}}
|
||||
|
@ -107,7 +107,7 @@ def write_model(
|
||||
|
||||
# Not sharded
|
||||
# (The sharded implementation would also work, but this is simpler.)
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
|
||||
|
||||
param_count = 0
|
||||
index_dict: Dict[str, Any] = {"weight_map": {}}
|
||||
|
@ -119,7 +119,7 @@ def write_model(model_path, input_base_path, tokenizer_path=None, safe_serializa
|
||||
print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
|
||||
|
||||
# Not sharded
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu")
|
||||
loaded = torch.load(os.path.join(input_base_path, "model.pt"), map_location="cpu", weights_only=True)
|
||||
|
||||
param_count = 0
|
||||
index_dict = {"weight_map": {}}
|
||||
|
@ -29,9 +29,9 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
def load_checkpoint(checkpoint_path):
|
||||
"""Checkpoint path should end in model.pt"""
|
||||
sd = torch.load(checkpoint_path, map_location="cpu")
|
||||
sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
if "model" in sd.keys():
|
||||
sd = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
|
||||
# pop unnecessary weights
|
||||
keys_to_delete = [
|
||||
|
@ -268,10 +268,10 @@ def convert_owlv2_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pa
|
||||
|
||||
# Verify pixel_values and input_ids
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlvit_pixel_values_960.pt", repo_type="dataset")
|
||||
original_pixel_values = torch.load(filepath).permute(0, 3, 1, 2)
|
||||
original_pixel_values = torch.load(filepath, weights_only=True).permute(0, 3, 1, 2)
|
||||
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="owlv2_input_ids.pt", repo_type="dataset")
|
||||
original_input_ids = torch.load(filepath).squeeze()
|
||||
original_input_ids = torch.load(filepath, weights_only=True).squeeze()
|
||||
|
||||
filepath = hf_hub_download(repo_id="adirik/OWL-ViT", repo_type="space", filename="assets/astronaut.png")
|
||||
image = Image.open(filepath)
|
||||
|
@ -82,7 +82,7 @@ def convert_persimmon_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_mode
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, ada_lib_path)
|
||||
model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
|
||||
model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
|
||||
state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
|
||||
state_dict = rename_state_dict(state_dict)
|
||||
|
||||
|
@ -43,7 +43,7 @@ def make_linear_from_emb(emb):
|
||||
def convert_fairseq_plbart_checkpoint_from_disk(
|
||||
checkpoint_path, hf_config_path="uclanlp/plbart-base", finetuned=False, classification=False
|
||||
):
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
remove_ignore_keys_(state_dict)
|
||||
vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
|
||||
|
||||
|
@ -151,7 +151,7 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
|
||||
logger.info(f"Converting model {model_name}...")
|
||||
|
||||
# load original state dict
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
|
||||
|
||||
# rename keys
|
||||
state_dict = rename_keys(state_dict)
|
||||
|
@ -26,7 +26,7 @@ from transformers import Pop2PianoConfig, Pop2PianoForConditionalGeneration
|
||||
|
||||
# This weights were downloaded from the official pop2piano repository
|
||||
# https://huggingface.co/sweetcocoa/pop2piano/blob/main/model-1999-val_0.67311615.ckpt
|
||||
official_weights = torch.load("./model-1999-val_0.67311615.ckpt")
|
||||
official_weights = torch.load("./model-1999-val_0.67311615.ckpt", weights_only=True)
|
||||
state_dict = {}
|
||||
|
||||
|
||||
|
@ -173,7 +173,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
|
||||
filename=f"{filename}",
|
||||
)
|
||||
|
||||
state_dict = torch.load(filepath, map_location="cpu")["state_dict"]
|
||||
state_dict = torch.load(filepath, map_location="cpu", weights_only=True)["state_dict"]
|
||||
state_dict = {key[9:]: state_dict[key] for key in state_dict}
|
||||
|
||||
# Convert state dict using mappings
|
||||
|
@ -165,7 +165,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
|
||||
raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
|
||||
config = PvtConfig(name_or_path=config_path)
|
||||
# load original model from https://github.com/whai362/PVT
|
||||
state_dict = torch.load(pvt_checkpoint, map_location="cpu")
|
||||
state_dict = torch.load(pvt_checkpoint, map_location="cpu", weights_only=True)
|
||||
|
||||
rename_keys = create_rename_keys(config)
|
||||
for src, dest in rename_keys:
|
||||
|
@ -207,7 +207,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
|
||||
)
|
||||
config = PvtV2Config.from_pretrained(config_path)
|
||||
# load original model from https://github.com/whai362/PVT
|
||||
state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu")
|
||||
state_dict = torch.load(pvt_v2_checkpoint, map_location="cpu", weights_only=True)
|
||||
|
||||
rename_keys = create_rename_keys(config)
|
||||
for src, dest in rename_keys:
|
||||
|
@ -71,7 +71,7 @@ LAYER_NAME_MAPPING = {"embedder.weight": "model.embed_tokens.weight"}
|
||||
|
||||
def write_model(save_path, input_base_path, config, safe_serialization=True, push_to_hub=False, dtype=torch.float32):
|
||||
print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu")
|
||||
model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)
|
||||
|
||||
REPLACEMENT = {
|
||||
"blocks.": "layers.",
|
||||
|
@ -37,7 +37,9 @@ def convert_roberta_prelayernorm_checkpoint_to_pytorch(checkpoint_repo: str, pyt
|
||||
)
|
||||
|
||||
# convert state_dict
|
||||
original_state_dict = torch.load(hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"))
|
||||
original_state_dict = torch.load(
|
||||
hf_hub_download(repo_id=checkpoint_repo, filename="pytorch_model.bin"), weights_only=True
|
||||
)
|
||||
state_dict = {}
|
||||
for tensor_key, tensor_value in original_state_dict.items():
|
||||
# The transformer implementation gives the model a unique name, rather than overwiriting 'roberta'
|
||||
|
@ -112,7 +112,7 @@ def convert_rmkv_checkpoint_to_hf_format(
|
||||
|
||||
# 3. Download model file then convert state_dict
|
||||
model_file = hf_hub_download(repo_id, checkpoint_file)
|
||||
state_dict = torch.load(model_file, map_location="cpu")
|
||||
state_dict = torch.load(model_file, map_location="cpu", weights_only=True)
|
||||
state_dict = convert_state_dict(state_dict)
|
||||
|
||||
# 4. Split in shards and save
|
||||
@ -147,7 +147,7 @@ def convert_rmkv_checkpoint_to_hf_format(
|
||||
gc.collect()
|
||||
|
||||
for shard_file in shard_files:
|
||||
state_dict = torch.load(os.path.join(output_dir, shard_file))
|
||||
state_dict = torch.load(os.path.join(output_dir, shard_file), weights_only=True)
|
||||
torch.save({k: v.cpu().clone() for k, v in state_dict.items()}, os.path.join(output_dir, shard_file))
|
||||
|
||||
del state_dict
|
||||
|
@ -137,7 +137,7 @@ def replace_keys(state_dict):
|
||||
def convert_sam_checkpoint(model_name, checkpoint_path, pytorch_dump_folder, push_to_hub):
|
||||
config = get_config(model_name)
|
||||
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
state_dict = replace_keys(state_dict)
|
||||
|
||||
image_processor = SamImageProcessor()
|
||||
|
@ -191,9 +191,9 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
|
||||
|
||||
# load original state dict
|
||||
if encoder_only:
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
|
||||
else:
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))["state_dict"]
|
||||
state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)["state_dict"]
|
||||
|
||||
# rename keys
|
||||
state_dict = rename_keys(state_dict, encoder_only=encoder_only)
|
||||
|
@ -441,9 +441,9 @@ def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logit
|
||||
raise ValueError("Image size not supported")
|
||||
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset")
|
||||
original_pixel_values = torch.load(filepath)
|
||||
original_pixel_values = torch.load(filepath, weights_only=True)
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset")
|
||||
original_input_ids = torch.load(filepath)
|
||||
original_input_ids = torch.load(filepath, weights_only=True)
|
||||
|
||||
if "i18n" not in model_name:
|
||||
assert inputs.input_ids.tolist() == original_input_ids.tolist()
|
||||
|
@ -52,7 +52,7 @@ def make_linear_from_emb(emb):
|
||||
|
||||
|
||||
def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_path):
|
||||
m2m_100 = torch.load(checkpoint_path, map_location="cpu")
|
||||
m2m_100 = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
args = m2m_100["args"]
|
||||
state_dict = m2m_100["model"]
|
||||
lm_head_weights = state_dict["decoder.output_projection.weight"]
|
||||
|
@ -70,7 +70,7 @@ def convert_hifigan_checkpoint(
|
||||
|
||||
model = SpeechT5HifiGan(config)
|
||||
|
||||
orig_checkpoint = torch.load(checkpoint_path)
|
||||
orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
load_weights(orig_checkpoint["model"]["generator"], model, config)
|
||||
|
||||
stats = np.load(stats_path)
|
||||
|
@ -361,7 +361,7 @@ def convert_speecht5_checkpoint(
|
||||
processor = SpeechT5Processor(tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
processor.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
fairseq_checkpoint = torch.load(checkpoint_path)
|
||||
fairseq_checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
recursively_load_weights(fairseq_checkpoint["model"], model, task)
|
||||
|
||||
model.save_pretrained(pytorch_dump_folder_path)
|
||||
|
@ -125,7 +125,7 @@ def convert_swiftformer_checkpoint(swiftformer_name, pytorch_dump_folder_path, o
|
||||
if original_ckpt.startswith("https"):
|
||||
checkpoint = torch.hub.load_state_dict_from_url(original_ckpt, map_location="cpu", check_hash=True)
|
||||
else:
|
||||
checkpoint = torch.load(original_ckpt, map_location="cpu")
|
||||
checkpoint = torch.load(original_ckpt, map_location="cpu", weights_only=True)
|
||||
state_dict = checkpoint
|
||||
|
||||
rename_keys = create_rename_keys(state_dict)
|
||||
|
@ -121,7 +121,7 @@ def convert_state_dict(orig_state_dict, model):
|
||||
|
||||
|
||||
def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub):
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
|
||||
|
||||
config = get_swin_config(model_name)
|
||||
model = SwinForMaskedImageModeling(config)
|
||||
|
@ -143,7 +143,7 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
|
||||
# download original checkpoint, hosted on Google Drive
|
||||
output = "pytorch_model.bin"
|
||||
gdown.cached_download(checkpoint_url, output, quiet=False)
|
||||
files = torch.load(output, map_location="cpu")
|
||||
files = torch.load(output, map_location="cpu", weights_only=True)
|
||||
if "model" in files:
|
||||
state_dict = files["model"]
|
||||
elif "module" in files:
|
||||
|
@ -98,7 +98,7 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
|
||||
|
||||
# load original state dict
|
||||
checkpoint_path = name_to_checkpoint_path[model_name]
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
||||
state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
print("Checkpoint path:", checkpoint_path)
|
||||
|
||||
@ -177,12 +177,12 @@ def convert_udop_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h
|
||||
# autoregressive decoding with original input data
|
||||
print("Testing generation with original inputs...")
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="input_ids_udop.pt", repo_type="dataset")
|
||||
input_ids = torch.load(filepath)
|
||||
input_ids = torch.load(filepath, weights_only=True)
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="bbox_udop.pt", repo_type="dataset")
|
||||
bbox = torch.load(filepath)
|
||||
bbox = torch.load(filepath, weights_only=True)
|
||||
pixel_values_filename = "pixel_values_udop_512.pt" if "512" in model_name else "pixel_values_udop_224.pt"
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename=pixel_values_filename, repo_type="dataset")
|
||||
pixel_values = torch.load(filepath)
|
||||
pixel_values = torch.load(filepath, weights_only=True)
|
||||
|
||||
print("Decoded input ids:", tokenizer.decode(input_ids[0], skip_special_tokens=True))
|
||||
print("Bbox shape:", bbox.shape)
|
||||
|
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
|
||||
"""
|
||||
Copy/paste/tweak model's weights to transformers design.
|
||||
"""
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
downstream_dict = checkpoint["Downstream"]
|
||||
|
||||
|
@ -106,7 +106,7 @@ def convert_univnet_checkpoint(
|
||||
repo_id=None,
|
||||
safe_serialization=False,
|
||||
):
|
||||
model_state_dict_base = torch.load(checkpoint_path, map_location="cpu")
|
||||
model_state_dict_base = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
# Get the generator's state dict
|
||||
state_dict = model_state_dict_base["model_g"]
|
||||
|
||||
|
@ -99,7 +99,7 @@ def convert_video_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_p
|
||||
state_dict_temp = "pytorch_model-0000{i}-of-00002.bin"
|
||||
for shard in range(1, 3):
|
||||
state_dict_path = hf_hub_download(old_state_dict_id, state_dict_temp.format(i=shard))
|
||||
state_dict = torch.load(state_dict_path, map_location="cpu")
|
||||
state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
|
||||
state_dict = convert_state_dict_to_hf(state_dict)
|
||||
model.load_state_dict(state_dict, strict=False, assign=True)
|
||||
model_state_dict -= set(state_dict.keys())
|
||||
|
@ -187,7 +187,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
|
||||
# download original checkpoint, hosted on Google Drive
|
||||
output = "pytorch_model.bin"
|
||||
gdown.cached_download(checkpoint_url, output, quiet=False)
|
||||
files = torch.load(output, map_location="cpu")
|
||||
files = torch.load(output, map_location="cpu", weights_only=True)
|
||||
if "model" in files:
|
||||
state_dict = files["model"]
|
||||
else:
|
||||
@ -204,7 +204,7 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
|
||||
|
||||
if "finetuned" not in model_name:
|
||||
local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt")
|
||||
inputs["bool_masked_pos"] = torch.load(local_path)
|
||||
inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True)
|
||||
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.logits
|
||||
|
@ -78,7 +78,7 @@ def convert_vipllava_llama_to_hf(text_model_id, vision_model_id, output_hub_path
|
||||
|
||||
state_dict_path = hf_hub_download(old_state_dict_id, "model_state_dict_7b.bin")
|
||||
|
||||
state_dict = torch.load(state_dict_path, map_location="cpu")
|
||||
state_dict = torch.load(state_dict_path, map_location="cpu", weights_only=True)
|
||||
state_dict = convert_state_dict_to_hf(state_dict)
|
||||
|
||||
model.load_state_dict(state_dict, strict=True, assign=True)
|
||||
|
@ -56,7 +56,7 @@ ACCEPTABLE_CHECKPOINTS = [
|
||||
|
||||
|
||||
def load_state_dict(checkpoint_path):
|
||||
sd = torch.load(checkpoint_path, map_location="cpu")
|
||||
sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
return sd
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu
|
||||
|
||||
filename = model_name_to_filename[model_name]
|
||||
filepath = hf_hub_download(repo_id="nielsr/vitmatte-checkpoints", filename=filename, repo_type="model")
|
||||
state_dict = torch.load(filepath, map_location="cpu")
|
||||
state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
|
||||
|
||||
# rename keys
|
||||
for key in state_dict.copy().keys():
|
||||
|
@ -207,7 +207,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
|
||||
)
|
||||
|
||||
print("Converting model...")
|
||||
original_state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
|
||||
original_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
|
||||
all_keys = list(original_state_dict.keys())
|
||||
new_keys = convert_old_keys_to_new_keys(all_keys)
|
||||
|
||||
@ -264,7 +264,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True):
|
||||
pixel_values = image_processor(images=image, boxes=boxes, return_tensors="pt").pixel_values
|
||||
|
||||
filepath = hf_hub_download(repo_id="nielsr/test-image", filename="vitpose_batch_data.pt", repo_type="dataset")
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu")["img"]
|
||||
original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True)["img"]
|
||||
# we allow for a small difference in the pixel values due to the original repository using cv2
|
||||
assert torch.allclose(pixel_values, original_pixel_values, atol=1e-1)
|
||||
|
||||
|
@ -346,7 +346,7 @@ def convert_checkpoint(
|
||||
|
||||
model.decoder.apply_weight_norm()
|
||||
|
||||
orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
|
||||
orig_checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True)
|
||||
recursively_load_weights(orig_checkpoint["model"], model)
|
||||
|
||||
model.decoder.remove_weight_norm()
|
||||
|
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
|
||||
"""
|
||||
Copy/paste/tweak model's weights to transformers design.
|
||||
"""
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
downstream_dict = checkpoint["Downstream"]
|
||||
|
||||
|
@ -179,7 +179,7 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro
|
||||
@torch.no_grad()
|
||||
def convert_wavlm_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_path=None):
|
||||
# load the pre-trained checkpoints
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
checkpoint = torch.load(checkpoint_path, weights_only=True)
|
||||
cfg = WavLMConfigOrig(checkpoint["cfg"])
|
||||
model = WavLMOrig(cfg)
|
||||
model.load_state_dict(checkpoint["model"])
|
||||
|
@ -71,7 +71,7 @@ def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path, mode
|
||||
"""
|
||||
Copy/paste/tweak model's weights to transformers design.
|
||||
"""
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
|
||||
downstream_dict = checkpoint["Downstream"]
|
||||
|
||||
|
@ -157,7 +157,7 @@ def _download(url: str, root: str) -> Any:
|
||||
if os.path.isfile(download_target):
|
||||
model_bytes = open(download_target, "rb").read()
|
||||
if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
|
||||
return torch.load(io.BytesIO(model_bytes))
|
||||
return torch.load(io.BytesIO(model_bytes), weights_only=True)
|
||||
else:
|
||||
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
|
||||
|
||||
@ -179,7 +179,7 @@ def _download(url: str, root: str) -> Any:
|
||||
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
|
||||
)
|
||||
|
||||
return torch.load(io.BytesIO(model_bytes))
|
||||
return torch.load(io.BytesIO(model_bytes), weights_only=True)
|
||||
|
||||
|
||||
def convert_openai_whisper_to_tfms(
|
||||
@ -190,7 +190,7 @@ def convert_openai_whisper_to_tfms(
|
||||
original_checkpoint = _download(_MODELS[checkpoint_path], root)
|
||||
openai_version = checkpoint_path
|
||||
else:
|
||||
original_checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
original_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||
openai_version = None
|
||||
|
||||
dimensions = original_checkpoint["dims"]
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user