mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 03:01:07 +06:00
Changing XLNet default from not using memories to 512 context size following paper (#8417)
* Move XLNet memory length FutureWarning * isort * style * Changed default XLNet memory length
This commit is contained in:
parent
190df58560
commit
4185b115d4
@ -15,8 +15,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" XLNet configuration """
|
""" XLNet configuration """
|
||||||
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
from .utils import logging
|
from .utils import logging
|
||||||
|
|
||||||
@ -144,7 +142,7 @@ class XLNetConfig(PretrainedConfig):
|
|||||||
initializer_range=0.02,
|
initializer_range=0.02,
|
||||||
layer_norm_eps=1e-12,
|
layer_norm_eps=1e-12,
|
||||||
dropout=0.1,
|
dropout=0.1,
|
||||||
mem_len=None,
|
mem_len=512,
|
||||||
reuse_len=None,
|
reuse_len=None,
|
||||||
bi_data=False,
|
bi_data=False,
|
||||||
clamp_len=-1,
|
clamp_len=-1,
|
||||||
@ -198,17 +196,6 @@ class XLNetConfig(PretrainedConfig):
|
|||||||
self.pad_token_id = pad_token_id
|
self.pad_token_id = pad_token_id
|
||||||
self.eos_token_id = eos_token_id
|
self.eos_token_id = eos_token_id
|
||||||
|
|
||||||
if mem_len is None or mem_len == 0:
|
|
||||||
warnings.warn(
|
|
||||||
"This config doesn't use attention memories, a core feature of XLNet."
|
|
||||||
" Consider setting `mem_len` to a non-zero value, for example "
|
|
||||||
"`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`,"
|
|
||||||
" for accurate training performance as well as an order of magnitude faster inference."
|
|
||||||
" Starting from version 3.5.0, the default parameter will be 1024, following"
|
|
||||||
" the implementation in https://arxiv.org/abs/1906.08237",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def max_position_embeddings(self):
|
def max_position_embeddings(self):
|
||||||
return -1
|
return -1
|
||||||
|
@ -16,8 +16,6 @@
|
|||||||
"""
|
"""
|
||||||
PyTorch XLNet model.
|
PyTorch XLNet model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
@ -1087,6 +1085,7 @@ class XLNetModel(XLNetPreTrainedModel):
|
|||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||||
|
Loading…
Reference in New Issue
Block a user