unfreeze initial cache in gpt models (#14535)

2025-07-31 02:02:21 +06:00 · 2021-11-26 18:21:47 +05:30 · 2021-11-26 18:21:47 +05:30 · 69511cdcae
commit 69511cdcae
parent 2318bf77eb
2 changed files with 2 additions and 2 deletions
--- a/src/transformers/models/gpt2/modeling_flax_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_flax_gpt2.py
@ -444,7 +444,7 @@ class FlaxGPT2PreTrainedModel(FlaxPreTrainedModel):
        init_variables = self.module.init(
            jax.random.PRNGKey(0), input_ids, attention_mask, position_ids, return_dict=False, init_cache=True
        )
-        return init_variables["cache"]
+        return unfreeze(init_variables["cache"])

    @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
    def __call__(
--- a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py
@ -388,7 +388,7 @@ class FlaxGPTNeoPreTrainedModel(FlaxPreTrainedModel):
        init_variables = self.module.init(
            jax.random.PRNGKey(0), input_ids, attention_mask, position_ids, return_dict=False, init_cache=True
        )
-        return init_variables["cache"]
+        return unfreeze(init_variables["cache"])

    @add_start_docstrings_to_model_forward(GPT_NEO_INPUTS_DOCSTRING)
    def __call__(