diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index c3c4c75750c..480e6f3f3f3 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3750,7 +3750,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi index = None if state_dict_split.is_sharded: index = { - "metadata": state_dict_split.metadata, + "metadata": {"total_parameters": self.num_parameters(), **state_dict_split.metadata}, "weight_map": state_dict_split.tensor_to_filename, }