From 32883b310ba30d72e67bb2ebb5847888f03a90a8 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Mon, 20 Jul 2020 11:50:41 -0400
Subject: [PATCH] Improve doc of use_cache (#5912)

* Improve doc of use_cache

* Update src/transformers/configuration_xlnet.py

Co-authored-by: Teven <teven.lescao@gmail.com>

Co-authored-by: Teven <teven.lescao@gmail.com>
---
 src/transformers/configuration_xlnet.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py
index edd8925592e..79a226d5151 100644
--- a/src/transformers/configuration_xlnet.py
+++ b/src/transformers/configuration_xlnet.py
@@ -111,7 +111,11 @@ class XLNetConfig(PretrainedConfig):
             end_n_top (:obj:`int`, optional, defaults to 5):
                 Used in the SQuAD evaluation script for XLM and XLNet.
             use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
-                Differs slightly from other models as it is always turned on at training time.
+                Whether or not the model should return the last pre-computed hidden states.
+
+                .. note::
+                    This flag behaves differently from with other models: it just controls the inference behavior, during
+                    training the model always uses ``use_cache=True``.
 
         Example::