Changing XLNet default from not using memories to 512 context size following paper (#8417)

* Move XLNet memory length FutureWarning * isort * style * Changed default XLNet memory length
2025-08-02 03:01:07 +06:00 · 2020-11-10 02:49:51 +01:00 · 2020-11-10 02:49:51 +01:00 · 4185b115d4
commit 4185b115d4
parent 190df58560
2 changed files with 2 additions and 16 deletions
--- a/src/transformers/configuration_xlnet.py
+++ b/src/transformers/configuration_xlnet.py
@ -15,8 +15,6 @@
 # limitations under the License.
 """ XLNet configuration """
 import warnings
 from .configuration_utils import PretrainedConfig
 from .utils import logging
@ -144,7 +142,7 @@ class XLNetConfig(PretrainedConfig):
        initializer_range=0.02,
        layer_norm_eps=1e-12,
        dropout=0.1,
-        mem_len=None,
+        mem_len=512,
        reuse_len=None,
        bi_data=False,
        clamp_len=-1,
@ -198,17 +196,6 @@ class XLNetConfig(PretrainedConfig):
        self.pad_token_id = pad_token_id
        self.eos_token_id = eos_token_id
        if mem_len is None or mem_len == 0:
            warnings.warn(
                "This config doesn't use attention memories, a core feature of XLNet."
                " Consider setting `mem_len` to a non-zero value, for example "
                "`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`,"
                " for accurate training performance as well as an order of magnitude faster inference."
                " Starting from version 3.5.0, the default parameter will be 1024, following"
                " the implementation in https://arxiv.org/abs/1906.08237",
                FutureWarning,
            )
    @property
    def max_position_embeddings(self):
        return -1
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@ -16,8 +16,6 @@
 """
 PyTorch XLNet model.
 """
 from dataclasses import dataclass
 from typing import List, Optional, Tuple
@ -1087,6 +1085,7 @@ class XLNetModel(XLNetPreTrainedModel):
        output_hidden_states=None,
        return_dict=None,
    ):
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states