Change default cache path (#8734)

* Change default cache path

* Document changes

* Apply suggestions from code review

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
Sylvain Gugger 2020-11-23 13:56:45 -05:00 committed by GitHub
parent 0cc5ab1333
commit 900024273b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 7 deletions

View File

@ -70,15 +70,15 @@ to check 🤗 Transformers is properly installed.
This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with
`cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the PyTorch
cache home followed by ``/transformers/`` (even if you don't have PyTorch installed). This is (by order of priority):
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the Hugging
Face cache home followed by ``/transformers/``. This is (by order of priority):
* shell environment variable ``TORCH_HOME``
* shell environment variable ``XDG_CACHE_HOME`` + ``/torch/``
* default: ``~/.cache/torch/``
* shell environment variable ``HF_HOME``
* shell environment variable ``XDG_CACHE_HOME`` + ``/huggingface/``
* default: ``~/.cache/huggingface/``
So if you don't have any specific environment variable set, the cache directory will be at
``~/.cache/torch/transformers/``.
``~/.cache/huggingface/transformers/``.
**Note:** If you have set a shell environment variable for one of the predecessors of this library
(``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell

View File

@ -203,8 +203,28 @@ except ImportError:
_tokenizers_available = False
default_cache_path = os.path.join(torch_cache_home, "transformers")
old_default_cache_path = os.path.join(torch_cache_home, "transformers")
# New default cache, shared with the Datasets library
hf_cache_home = os.path.expanduser(
os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
)
default_cache_path = os.path.join(hf_cache_home, "transformers")
# Onetime move from the old location to the new one if no ENV variable has been set.
if (
os.path.isdir(old_default_cache_path)
and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ
and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ
and "TRANSFORMERS_CACHE" not in os.environ
):
logger.warn(
"In Transformers v4.0.0, the default path to cache downloaded models changed from "
"'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden "
"and '~/.cache/torch/transformers' is a directory that exists, we're moving it to "
"'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should "
"only see this message once."
)
shutil.move(old_default_cache_path, default_cache_path)
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)