make sure to disable gradients for integer tensor (#32943)

2025-08-03 03:31:05 +06:00 · 2024-11-18 10:49:37 -05:00 · 2024-11-18 10:49:37 -05:00 · 36759f3312
commit 36759f3312
parent 1c471fc307
1 changed files with 4 additions and 1 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@ -927,7 +927,10 @@ def _load_state_dict_into_meta_model(
                param_to = "cpu"
                if is_fsdp_enabled() and not is_local_dist_rank_0():
                    param_to = "meta"
-                value = type(value)(value.data.to(param_to), **value.__dict__)
+                val_kwargs = {}
+                if hasattr(module, "weight") and module.weight.__class__.__name__ == "Int8Params":
+                    val_kwargs["requires_grad"] = False
+                value = type(value)(value.data.to(param_to), **val_kwargs, **value.__dict__)
                setattr(module, tensor_name, value)
            # TODO: consider removing used param_parts from state_dict before return