Changing XLNet default from not using memories to 512 context size following paper (#8417)

* Move XLNet memory length FutureWarning

* isort

* style

* Changed default XLNet memory length
This commit is contained in:
Teven 2020-11-10 02:49:51 +01:00 committed by GitHub
parent 190df58560
commit 4185b115d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 16 deletions

View File

@ -15,8 +15,6 @@
# limitations under the License.
""" XLNet configuration """
import warnings
from .configuration_utils import PretrainedConfig
from .utils import logging
@ -144,7 +142,7 @@ class XLNetConfig(PretrainedConfig):
initializer_range=0.02,
layer_norm_eps=1e-12,
dropout=0.1,
mem_len=None,
mem_len=512,
reuse_len=None,
bi_data=False,
clamp_len=-1,
@ -198,17 +196,6 @@ class XLNetConfig(PretrainedConfig):
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
if mem_len is None or mem_len == 0:
warnings.warn(
"This config doesn't use attention memories, a core feature of XLNet."
" Consider setting `mem_len` to a non-zero value, for example "
"`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`,"
" for accurate training performance as well as an order of magnitude faster inference."
" Starting from version 3.5.0, the default parameter will be 1024, following"
" the implementation in https://arxiv.org/abs/1906.08237",
FutureWarning,
)
@property
def max_position_embeddings(self):
return -1

View File

@ -16,8 +16,6 @@
"""
PyTorch XLNet model.
"""
from dataclasses import dataclass
from typing import List, Optional, Tuple
@ -1087,6 +1085,7 @@ class XLNetModel(XLNetPreTrainedModel):
output_hidden_states=None,
return_dict=None,
):
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states