diff --git a/src/transformers/models/esm/modeling_esm.py b/src/transformers/models/esm/modeling_esm.py index 43ff7d7b52b..2d5ca07f2fc 100755 --- a/src/transformers/models/esm/modeling_esm.py +++ b/src/transformers/models/esm/modeling_esm.py @@ -96,7 +96,7 @@ class RotaryEmbedding(torch.nn.Module): # Generate and save the inverse frequency buffer (non trainable) inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) inv_freq = inv_freq - self.register_buffer("inv_freq", inv_freq, persistent=False) + self.register_buffer("inv_freq", inv_freq) self._seq_len_cached = None self._cos_cached = None