From 41a0e58e5ba4945c244a6141c2adc600fa0a7fdc Mon Sep 17 00:00:00 2001 From: cyyever Date: Thu, 27 Mar 2025 22:55:50 +0800 Subject: [PATCH] Set weights_only in torch.load (#36991) --- .../flax/vision/run_image_classification.py | 2 +- .../multiple_choice/utils_multiple_choice.py | 2 +- examples/legacy/pytorch-lightning/run_glue.py | 2 +- examples/legacy/pytorch-lightning/run_ner.py | 4 +- .../legacy/question-answering/run_squad.py | 6 +- examples/legacy/run_swag.py | 2 +- .../legacy/seq2seq/convert_model_to_fp16.py | 2 +- .../legacy/token-classification/utils_ner.py | 2 +- .../convert_pytorch_checkpoint_to_tf2.py | 7 +-- src/transformers/data/datasets/squad.py | 2 +- .../modeling_flax_pytorch_utils.py | 6 +- src/transformers/modeling_tf_pytorch_utils.py | 3 +- src/transformers/modeling_utils.py | 8 +-- ..._original_pytorch_checkpoint_to_pytorch.py | 2 +- .../models/phi/convert_phi_weights_to_hf.py | 2 +- .../models/wav2vec2/modeling_wav2vec2.py | 3 +- src/transformers/trainer.py | 57 +++++++++---------- .../autoformer/test_modeling_autoformer.py | 2 +- .../idefics/test_image_processing_idefics.py | 2 +- .../models/informer/test_modeling_informer.py | 2 +- .../llava_next/test_modeling_llava_next.py | 4 +- .../test_modeling_patchtsmixer.py | 2 +- .../models/patchtst/test_modeling_patchtst.py | 2 +- .../test_modeling_time_series_transformer.py | 2 +- .../models/videomae/test_modeling_videomae.py | 2 +- .../peft_integration/test_peft_integration.py | 8 +-- tests/trainer/test_trainer.py | 2 +- tests/utils/test_modeling_utils.py | 2 +- 28 files changed, 64 insertions(+), 78 deletions(-) diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 0228a8797b6..4b50352f580 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -36,12 +36,12 @@ import optax # for dataset and preprocessing import torch import torchvision -import torchvision.transforms as transforms from flax import jax_utils from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from huggingface_hub import HfApi +from torchvision import transforms from tqdm import tqdm import transformers diff --git a/examples/legacy/multiple_choice/utils_multiple_choice.py b/examples/legacy/multiple_choice/utils_multiple_choice.py index 6b7559c49e5..2ee901c1bc8 100644 --- a/examples/legacy/multiple_choice/utils_multiple_choice.py +++ b/examples/legacy/multiple_choice/utils_multiple_choice.py @@ -113,7 +113,7 @@ if is_torch_available(): with FileLock(lock_path): if os.path.exists(cached_features_file) and not overwrite_cache: logger.info(f"Loading features from cached file {cached_features_file}") - self.features = torch.load(cached_features_file) + self.features = torch.load(cached_features_file, weights_only=True) else: logger.info(f"Creating features from dataset file at {data_dir}") label_list = processor.get_labels() diff --git a/examples/legacy/pytorch-lightning/run_glue.py b/examples/legacy/pytorch-lightning/run_glue.py index 681f633fcd6..00302c5061c 100644 --- a/examples/legacy/pytorch-lightning/run_glue.py +++ b/examples/legacy/pytorch-lightning/run_glue.py @@ -81,7 +81,7 @@ class GLUETransformer(BaseTransformer): cached_features_file = self._feature_file(mode) logger.info("Loading features from cached file %s", cached_features_file) - features = torch.load(cached_features_file) + features = torch.load(cached_features_file, weights_only=True) all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) diff --git a/examples/legacy/pytorch-lightning/run_ner.py b/examples/legacy/pytorch-lightning/run_ner.py index fc6f812275e..144759d36aa 100644 --- a/examples/legacy/pytorch-lightning/run_ner.py +++ b/examples/legacy/pytorch-lightning/run_ner.py @@ -63,7 +63,7 @@ class NERTransformer(BaseTransformer): cached_features_file = self._feature_file(mode) if os.path.exists(cached_features_file) and not args.overwrite_cache: logger.info("Loading features from cached file %s", cached_features_file) - features = torch.load(cached_features_file) + features = torch.load(cached_features_file, weights_only=True) else: logger.info("Creating features from dataset file at %s", args.data_dir) examples = self.token_classification_task.read_examples_from_file(args.data_dir, mode) @@ -89,7 +89,7 @@ class NERTransformer(BaseTransformer): "Load datasets. Called after prepare data." cached_features_file = self._feature_file(mode) logger.info("Loading features from cached file %s", cached_features_file) - features = torch.load(cached_features_file) + features = torch.load(cached_features_file, weights_only=True) all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) if features[0].token_type_ids is not None: diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py index 19d3c6f49e7..007e9e7ab27 100644 --- a/examples/legacy/question-answering/run_squad.py +++ b/examples/legacy/question-answering/run_squad.py @@ -105,8 +105,8 @@ def train(args, train_dataset, model, tokenizer): os.path.join(args.model_name_or_path, "scheduler.pt") ): # Load in optimizer and scheduler states - optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"))) - scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"))) + optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"), weights_only=True)) + scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"), weights_only=True)) if args.fp16: try: @@ -417,7 +417,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal # Init features and dataset from cache if it exists if os.path.exists(cached_features_file) and not args.overwrite_cache: logger.info("Loading features from cached file %s", cached_features_file) - features_and_dataset = torch.load(cached_features_file) + features_and_dataset = torch.load(cached_features_file, weights_only=True) features, dataset, examples = ( features_and_dataset["features"], features_and_dataset["dataset"], diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py index 8fca5507bc5..55fd0aa0520 100755 --- a/examples/legacy/run_swag.py +++ b/examples/legacy/run_swag.py @@ -244,7 +244,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal ) if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: logger.info("Loading features from cached file %s", cached_features_file) - features = torch.load(cached_features_file) + features = torch.load(cached_features_file, weights_only=True) else: logger.info("Creating features from dataset file at %s", input_file) examples = read_swag_examples(input_file) diff --git a/examples/legacy/seq2seq/convert_model_to_fp16.py b/examples/legacy/seq2seq/convert_model_to_fp16.py index 7fffbde79df..8d568a7e4af 100755 --- a/examples/legacy/seq2seq/convert_model_to_fp16.py +++ b/examples/legacy/seq2seq/convert_model_to_fp16.py @@ -22,7 +22,7 @@ from tqdm import tqdm def convert(src_path: str, map_location: str = "cpu", save_path: Union[str, None] = None) -> None: """Convert a pytorch_model.bin or model.pt file to torch.float16 for faster downloads, less disk space.""" - state_dict = torch.load(src_path, map_location=map_location) + state_dict = torch.load(src_path, map_location=map_location, weights_only=True) for k, v in tqdm(state_dict.items()): if not isinstance(v, torch.Tensor): raise TypeError("FP16 conversion only works on paths that are saved state dicts, like pytorch_model.bin") diff --git a/examples/legacy/token-classification/utils_ner.py b/examples/legacy/token-classification/utils_ner.py index da4d8c3b605..9167ce15161 100644 --- a/examples/legacy/token-classification/utils_ner.py +++ b/examples/legacy/token-classification/utils_ner.py @@ -242,7 +242,7 @@ if is_torch_available(): with FileLock(lock_path): if os.path.exists(cached_features_file) and not overwrite_cache: logger.info(f"Loading features from cached file {cached_features_file}") - self.features = torch.load(cached_features_file) + self.features = torch.load(cached_features_file, weights_only=True) else: logger.info(f"Creating features from dataset file at {data_dir}") examples = token_classification_task.read_examples_from_file(data_dir, mode) diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index 46c7ba12a54..fad6463e982 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -277,12 +277,7 @@ def convert_pt_checkpoint_to_tf( if compare_with_pt_model: tfo = tf_model(tf_model.dummy_inputs, training=False) # build the network - weights_only_kwarg = {"weights_only": True} - state_dict = torch.load( - pytorch_checkpoint_path, - map_location="cpu", - **weights_only_kwarg, - ) + state_dict = torch.load(pytorch_checkpoint_path, map_location="cpu", weights_only=True) pt_model = pt_model_class.from_pretrained( pretrained_model_name_or_path=None, config=config, state_dict=state_dict ) diff --git a/src/transformers/data/datasets/squad.py b/src/transformers/data/datasets/squad.py index d81217d818a..7546d7b49ed 100644 --- a/src/transformers/data/datasets/squad.py +++ b/src/transformers/data/datasets/squad.py @@ -148,7 +148,7 @@ class SquadDataset(Dataset): with FileLock(lock_path): if os.path.exists(cached_features_file) and not args.overwrite_cache: start = time.time() - self.old_features = torch.load(cached_features_file) + self.old_features = torch.load(cached_features_file, weights_only=True) # Legacy cache files have only features, while new cache files # will have dataset and examples also. diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index 8fbba8a1651..07285065772 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -71,8 +71,7 @@ def load_pytorch_checkpoint_in_flax_state_dict( ) raise - weights_only_kwarg = {"weights_only": True} - pt_state_dict = torch.load(pt_path, map_location="cpu", **weights_only_kwarg) + pt_state_dict = torch.load(pt_path, map_location="cpu", weights_only=True) logger.info(f"PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values()):,} parameters.") flax_state_dict = convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model) @@ -248,8 +247,7 @@ def convert_pytorch_sharded_state_dict_to_flax(shard_filenames, flax_model): flax_state_dict = {} for shard_file in shard_filenames: # load using msgpack utils - weights_only_kwarg = {"weights_only": True} - pt_state_dict = torch.load(shard_file, **weights_only_kwarg) + pt_state_dict = torch.load(shard_file, weights_only=True) weight_dtypes = {k: v.dtype for k, v in pt_state_dict.items()} pt_state_dict = { k: v.numpy() if v.dtype != torch.bfloat16 else v.float().numpy() for k, v in pt_state_dict.items() diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index 416db528880..84a6ddaebcc 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -198,8 +198,7 @@ def load_pytorch_checkpoint_in_tf2_model( if pt_path.endswith(".safetensors"): state_dict = safe_load_file(pt_path) else: - weights_only_kwarg = {"weights_only": True} - state_dict = torch.load(pt_path, map_location="cpu", **weights_only_kwarg) + state_dict = torch.load(pt_path, map_location="cpu", weights_only=True) pt_state_dict.update(state_dict) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index abf8dec55cc..3c4526eb1e6 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -504,8 +504,7 @@ def load_sharded_checkpoint(model, folder, strict=True, prefer_safe=True): error_message += f"\nMissing key(s): {str_unexpected_keys}." raise RuntimeError(error_message) - weights_only_kwarg = {"weights_only": True} - loader = safe_load_file if load_safe else partial(torch.load, map_location="cpu", **weights_only_kwarg) + loader = safe_load_file if load_safe else partial(torch.load, map_location="cpu", weights_only=True) for shard_file in shard_files: state_dict = loader(os.path.join(folder, shard_file)) @@ -598,11 +597,10 @@ def load_state_dict( and is_zipfile(checkpoint_file) ): extra_args = {"mmap": True} - weights_only_kwarg = {"weights_only": weights_only} return torch.load( checkpoint_file, map_location=map_location, - **weights_only_kwarg, + weights_only=weights_only, **extra_args, ) except Exception as e: @@ -1216,7 +1214,7 @@ def _get_torch_dtype( weights_only: bool, ) -> Tuple[PretrainedConfig, Optional[torch.dtype], Optional[torch.dtype]]: """Find the correct `torch_dtype` to use based on provided arguments. Also update the `config` based on the - infered dtype. We do the following: + inferred dtype. We do the following: 1. If torch_dtype is not None, we use that dtype 2. If torch_dtype is "auto", we auto-detect dtype from the loaded state_dict, by checking its first weights entry that is of a floating type - we assume all floating dtype weights are of the same dtype diff --git a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py index 5339f1671b0..4ecc3335514 100644 --- a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py @@ -207,7 +207,7 @@ def convert_wav2vec2_checkpoint( hf_wav2vec = Data2VecAudioModel(config) data2vec_checkpoint_dir = os.path.dirname(checkpoint_path) - state_dict = torch.load(checkpoint_path) + state_dict = torch.load(checkpoint_path, weights_only=True) state_dict["model"]["final_proj.weight"] = state_dict["model"].pop("final_proj.0.weight") state_dict["model"]["final_proj.bias"] = state_dict["model"].pop("final_proj.0.bias") converted_ckpt = os.path.join(data2vec_checkpoint_dir, "converted.pt") diff --git a/src/transformers/models/phi/convert_phi_weights_to_hf.py b/src/transformers/models/phi/convert_phi_weights_to_hf.py index 69ef4c5919e..09742336611 100644 --- a/src/transformers/models/phi/convert_phi_weights_to_hf.py +++ b/src/transformers/models/phi/convert_phi_weights_to_hf.py @@ -121,7 +121,7 @@ def convert_phi_weights( if model_path.endswith("safetensors"): loaded_weights = safetensors.torch.load_file(model_path, device=device) else: - loaded_weights = torch.load(model_path, map_location=device) + loaded_weights = torch.load(model_path, map_location=device, weights_only=True) model_checkpoint.update(**loaded_weights) model_type = model_name.split("/")[1] # phi-1 or phi-1_5 or phi-2 diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index f9043eba0eb..b7b37e47a67 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1589,11 +1589,10 @@ class Wav2Vec2PreTrainedModel(PreTrainedModel): cache_dir=cache_dir, ) - weights_only_kwarg = {"weights_only": True} state_dict = torch.load( weight_path, map_location="cpu", - **weights_only_kwarg, + weights_only=True, ) except EnvironmentError: diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 17ad6145045..db8a5615cc0 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2820,7 +2820,6 @@ class Trainer: ) if os.path.isfile(weights_file) or os.path.isfile(safe_weights_file) or is_fsdp_ckpt: - weights_only_kwarg = {"weights_only": True} # If the model is on the GPU, it still works! if is_sagemaker_mp_enabled(): if os.path.isfile(os.path.join(resume_from_checkpoint, "user_content.pt")): @@ -2836,11 +2835,7 @@ class Trainer: logger.warning( "Enabling FP16 and loading from smp < 1.10 checkpoint together is not supported." ) - state_dict = torch.load( - weights_file, - map_location="cpu", - **weights_only_kwarg, - ) + state_dict = torch.load(weights_file, map_location="cpu", weights_only=True) # Required for smp to not auto-translate state_dict from hf to smp (is already smp). state_dict["_smp_is_partial"] = False load_result = model.load_state_dict(state_dict, strict=True) @@ -2859,11 +2854,7 @@ class Trainer: if self.args.save_safetensors and os.path.isfile(safe_weights_file): state_dict = safetensors.torch.load_file(safe_weights_file, device="cpu") else: - state_dict = torch.load( - weights_file, - map_location="cpu", - **weights_only_kwarg, - ) + state_dict = torch.load(weights_file, map_location="cpu", weights_only=True) # workaround for FSDP bug https://github.com/pytorch/pytorch/issues/82963 # which takes *args instead of **kwargs @@ -2941,7 +2932,6 @@ class Trainer: or os.path.exists(best_safe_adapter_model_path) ): has_been_loaded = True - weights_only_kwarg = {"weights_only": True} if is_sagemaker_mp_enabled(): if os.path.isfile(os.path.join(self.state.best_model_checkpoint, "user_content.pt")): # If the 'user_content.pt' file exists, load with the new smp api. @@ -2958,11 +2948,7 @@ class Trainer: if self.args.save_safetensors and os.path.isfile(best_safe_model_path): state_dict = safetensors.torch.load_file(best_safe_model_path, device="cpu") else: - state_dict = torch.load( - best_model_path, - map_location="cpu", - **weights_only_kwarg, - ) + state_dict = torch.load(best_model_path, map_location="cpu", weights_only=True) state_dict["_smp_is_partial"] = False load_result = model.load_state_dict(state_dict, strict=True) @@ -3017,11 +3003,7 @@ class Trainer: if self.args.save_safetensors and os.path.isfile(best_safe_model_path): state_dict = safetensors.torch.load_file(best_safe_model_path, device="cpu") else: - state_dict = torch.load( - best_model_path, - map_location="cpu", - **weights_only_kwarg, - ) + state_dict = torch.load(best_model_path, map_location="cpu", weights_only=True) # If the model is on the GPU, it still works! # workaround for FSDP bug https://github.com/pytorch/pytorch/issues/82963 @@ -3142,7 +3124,7 @@ class Trainer: return with safe_globals(): - checkpoint_rng_state = torch.load(rng_file) + checkpoint_rng_state = torch.load(rng_file, weights_only=True) random.setstate(checkpoint_rng_state["python"]) np.random.set_state(checkpoint_rng_state["numpy"]) torch.random.set_rng_state(checkpoint_rng_state["cpu"]) @@ -3375,7 +3357,9 @@ class Trainer: # deepspeed loads optimizer/lr_scheduler together with the model in deepspeed_init if not isinstance(self.lr_scheduler, DeepSpeedSchedulerWrapper): with warnings.catch_warnings(record=True) as caught_warnings: - self.lr_scheduler.load_state_dict(torch.load(os.path.join(checkpoint, SCHEDULER_NAME))) + self.lr_scheduler.load_state_dict( + torch.load(os.path.join(checkpoint, SCHEDULER_NAME), weights_only=True) + ) reissue_pt_warnings(caught_warnings) return @@ -3410,13 +3394,18 @@ class Trainer: checkpoint, f"rank{self.args.process_index}-of-{self.args.world_size}-{OPTIMIZER_NAME}" ), map_location="cpu", + weights_only=True, ) # We only need `optimizer` when resuming from checkpoint optimizer_state = optimizer_state["optimizer"] else: - optimizer_state = torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location="cpu") + optimizer_state = torch.load( + os.path.join(checkpoint, OPTIMIZER_NAME), map_location="cpu", weights_only=True + ) with warnings.catch_warnings(record=True) as caught_warnings: - lr_scheduler_state = torch.load(os.path.join(checkpoint, SCHEDULER_NAME), map_location="cpu") + lr_scheduler_state = torch.load( + os.path.join(checkpoint, SCHEDULER_NAME), map_location="cpu", weights_only=True + ) reissue_pt_warnings(caught_warnings) xm.send_cpu_data_to_device(optimizer_state, self.args.device) @@ -3458,10 +3447,14 @@ class Trainer: ) else: self.optimizer.load_state_dict( - torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location) + torch.load( + os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location, weights_only=True + ) ) with warnings.catch_warnings(record=True) as caught_warnings: - self.lr_scheduler.load_state_dict(torch.load(os.path.join(checkpoint, SCHEDULER_NAME))) + self.lr_scheduler.load_state_dict( + torch.load(os.path.join(checkpoint, SCHEDULER_NAME), weights_only=True) + ) reissue_pt_warnings(caught_warnings) def _save_scaler(self, output_dir): @@ -3496,13 +3489,17 @@ class Trainer: # Load in scaler states if is_torch_xla_available(): with warnings.catch_warnings(record=True) as caught_warnings: - scaler_state = torch.load(os.path.join(checkpoint, SCALER_NAME), map_location="cpu") + scaler_state = torch.load( + os.path.join(checkpoint, SCALER_NAME), map_location="cpu", weights_only=True + ) reissue_pt_warnings(caught_warnings) xm.send_cpu_data_to_device(scaler_state, self.args.device) self.accelerator.scaler.load_state_dict(scaler_state) else: with warnings.catch_warnings(record=True) as caught_warnings: - self.accelerator.scaler.load_state_dict(torch.load(os.path.join(checkpoint, SCALER_NAME))) + self.accelerator.scaler.load_state_dict( + torch.load(os.path.join(checkpoint, SCALER_NAME), weights_only=True) + ) reissue_pt_warnings(caught_warnings) def _load_callback_state(self): diff --git a/tests/models/autoformer/test_modeling_autoformer.py b/tests/models/autoformer/test_modeling_autoformer.py index 92aa1ad4c9d..cca8f3b3ac8 100644 --- a/tests/models/autoformer/test_modeling_autoformer.py +++ b/tests/models/autoformer/test_modeling_autoformer.py @@ -415,7 +415,7 @@ class AutoformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa def prepare_batch(filename="train-batch.pt"): file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch", filename=filename, repo_type="dataset") - batch = torch.load(file, map_location=torch_device) + batch = torch.load(file, map_location=torch_device, weights_only=True) return batch diff --git a/tests/models/idefics/test_image_processing_idefics.py b/tests/models/idefics/test_image_processing_idefics.py index ad208881578..5b40a0393e5 100644 --- a/tests/models/idefics/test_image_processing_idefics.py +++ b/tests/models/idefics/test_image_processing_idefics.py @@ -28,7 +28,7 @@ if is_torch_available(): import torch if is_torchvision_available(): - import torchvision.transforms as transforms + from torchvision import transforms if is_vision_available(): from PIL import Image diff --git a/tests/models/informer/test_modeling_informer.py b/tests/models/informer/test_modeling_informer.py index fb91a652004..b5e35490f6f 100644 --- a/tests/models/informer/test_modeling_informer.py +++ b/tests/models/informer/test_modeling_informer.py @@ -476,7 +476,7 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase def prepare_batch(filename="train-batch.pt"): file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch", filename=filename, repo_type="dataset") - batch = torch.load(file, map_location=torch_device) + batch = torch.load(file, map_location=torch_device, weights_only=True) return batch diff --git a/tests/models/llava_next/test_modeling_llava_next.py b/tests/models/llava_next/test_modeling_llava_next.py index 8d517c644d1..a9758bd4230 100644 --- a/tests/models/llava_next/test_modeling_llava_next.py +++ b/tests/models/llava_next/test_modeling_llava_next.py @@ -408,7 +408,7 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase): filename="llava_1_6_input_ids.pt", repo_type="dataset", ) - original_input_ids = torch.load(filepath, map_location="cpu") + original_input_ids = torch.load(filepath, map_location="cpu", weights_only=True) # replace -200 by image_token_index (since we use token ID = 32000 for the image token) # remove image token indices because HF impl expands image tokens `image_seq_length` times original_input_ids = original_input_ids[original_input_ids != -200] @@ -420,7 +420,7 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase): filename="llava_1_6_pixel_values.pt", repo_type="dataset", ) - original_pixel_values = torch.load(filepath, map_location="cpu") + original_pixel_values = torch.load(filepath, map_location="cpu", weights_only=True) assert torch.allclose(original_pixel_values, inputs.pixel_values.half()) # verify generation diff --git a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py index 939072d825f..2f39b9e6800 100644 --- a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py +++ b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py @@ -452,7 +452,7 @@ class PatchTSMixerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test def prepare_batch(repo_id="ibm/patchtsmixer-etth1-test-data", file="pretrain_batch.pt"): # TODO: Make repo public file = hf_hub_download(repo_id=repo_id, filename=file, repo_type="dataset") - batch = torch.load(file, map_location=torch_device) + batch = torch.load(file, map_location=torch_device, weights_only=True) return batch diff --git a/tests/models/patchtst/test_modeling_patchtst.py b/tests/models/patchtst/test_modeling_patchtst.py index 0956386f0d3..8f48a6f904a 100644 --- a/tests/models/patchtst/test_modeling_patchtst.py +++ b/tests/models/patchtst/test_modeling_patchtst.py @@ -303,7 +303,7 @@ class PatchTSTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase def prepare_batch(repo_id="hf-internal-testing/etth1-hourly-batch", file="train-batch.pt"): file = hf_hub_download(repo_id=repo_id, filename=file, repo_type="dataset") - batch = torch.load(file, map_location=torch_device) + batch = torch.load(file, map_location=torch_device, weights_only=True) return batch diff --git a/tests/models/time_series_transformer/test_modeling_time_series_transformer.py b/tests/models/time_series_transformer/test_modeling_time_series_transformer.py index 50abfaa765e..f9dfefe3767 100644 --- a/tests/models/time_series_transformer/test_modeling_time_series_transformer.py +++ b/tests/models/time_series_transformer/test_modeling_time_series_transformer.py @@ -481,7 +481,7 @@ class TimeSeriesTransformerModelTest(ModelTesterMixin, PipelineTesterMixin, unit def prepare_batch(filename="train-batch.pt"): file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch", filename=filename, repo_type="dataset") - batch = torch.load(file, map_location=torch_device) + batch = torch.load(file, map_location=torch_device, weights_only=True) return batch diff --git a/tests/models/videomae/test_modeling_videomae.py b/tests/models/videomae/test_modeling_videomae.py index 4b1abab2067..8f1ab7de053 100644 --- a/tests/models/videomae/test_modeling_videomae.py +++ b/tests/models/videomae/test_modeling_videomae.py @@ -456,7 +456,7 @@ class VideoMAEModelIntegrationTest(unittest.TestCase): # add boolean mask, indicating which patches to mask local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt") - inputs["bool_masked_pos"] = torch.load(local_path) + inputs["bool_masked_pos"] = torch.load(local_path, weights_only=True) # forward pass with torch.no_grad(): diff --git a/tests/peft_integration/test_peft_integration.py b/tests/peft_integration/test_peft_integration.py index 5ebb53c5be4..22282aa07f3 100644 --- a/tests/peft_integration/test_peft_integration.py +++ b/tests/peft_integration/test_peft_integration.py @@ -554,7 +554,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin") - dummy_state_dict = torch.load(state_dict_path) + dummy_state_dict = torch.load(state_dict_path, weights_only=True) model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=peft_config) with self.assertRaises(ValueError): @@ -579,7 +579,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): peft_config = LoraConfig() state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin") - dummy_state_dict = torch.load(state_dict_path) + dummy_state_dict = torch.load(state_dict_path, weights_only=True) # this should always work model.load_adapter( @@ -647,7 +647,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): peft_config = LoraConfig() state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin") - dummy_state_dict = torch.load(state_dict_path) + dummy_state_dict = torch.load(state_dict_path, weights_only=True) # add unexpected key dummy_state_dict["foobar"] = next(iter(dummy_state_dict.values())) @@ -674,7 +674,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): peft_config = LoraConfig() state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin") - dummy_state_dict = torch.load(state_dict_path) + dummy_state_dict = torch.load(state_dict_path, weights_only=True) # remove a key so that we have missing keys key = next(iter(dummy_state_dict.keys())) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 7e157550589..f7e16926f2d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -648,7 +648,7 @@ class TrainerIntegrationCommon: else: best_model = RegressionModel() if not safe_weights: - state_dict = torch.load(os.path.join(checkpoint, WEIGHTS_NAME)) + state_dict = torch.load(os.path.join(checkpoint, WEIGHTS_NAME), weights_only=True) else: state_dict = safetensors.torch.load_file(os.path.join(checkpoint, SAFE_WEIGHTS_NAME)) best_model.load_state_dict(state_dict) diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 96cbd4f77bb..212969ce150 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -765,7 +765,7 @@ class ModelUtilsTest(TestCasePlus): # Note: pickle adds some junk so the weight of the file can end up being slightly bigger than # the size asked for (since we count parameters) if size >= max_size_int + 50000: - state_dict = torch.load(shard_file) + state_dict = torch.load(shard_file, weights_only=True) self.assertEqual(len(state_dict), 1) # Check the index and the shard files found match