mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Change default cache path (#8734)
* Change default cache path * Document changes * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
parent
0cc5ab1333
commit
900024273b
@ -70,15 +70,15 @@ to check 🤗 Transformers is properly installed.
|
||||
|
||||
This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with
|
||||
`cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the
|
||||
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the PyTorch
|
||||
cache home followed by ``/transformers/`` (even if you don't have PyTorch installed). This is (by order of priority):
|
||||
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the Hugging
|
||||
Face cache home followed by ``/transformers/``. This is (by order of priority):
|
||||
|
||||
* shell environment variable ``TORCH_HOME``
|
||||
* shell environment variable ``XDG_CACHE_HOME`` + ``/torch/``
|
||||
* default: ``~/.cache/torch/``
|
||||
* shell environment variable ``HF_HOME``
|
||||
* shell environment variable ``XDG_CACHE_HOME`` + ``/huggingface/``
|
||||
* default: ``~/.cache/huggingface/``
|
||||
|
||||
So if you don't have any specific environment variable set, the cache directory will be at
|
||||
``~/.cache/torch/transformers/``.
|
||||
``~/.cache/huggingface/transformers/``.
|
||||
|
||||
**Note:** If you have set a shell environment variable for one of the predecessors of this library
|
||||
(``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell
|
||||
|
@ -203,8 +203,28 @@ except ImportError:
|
||||
_tokenizers_available = False
|
||||
|
||||
|
||||
default_cache_path = os.path.join(torch_cache_home, "transformers")
|
||||
old_default_cache_path = os.path.join(torch_cache_home, "transformers")
|
||||
# New default cache, shared with the Datasets library
|
||||
hf_cache_home = os.path.expanduser(
|
||||
os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
|
||||
)
|
||||
default_cache_path = os.path.join(hf_cache_home, "transformers")
|
||||
|
||||
# Onetime move from the old location to the new one if no ENV variable has been set.
|
||||
if (
|
||||
os.path.isdir(old_default_cache_path)
|
||||
and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ
|
||||
and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ
|
||||
and "TRANSFORMERS_CACHE" not in os.environ
|
||||
):
|
||||
logger.warn(
|
||||
"In Transformers v4.0.0, the default path to cache downloaded models changed from "
|
||||
"'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden "
|
||||
"and '~/.cache/torch/transformers' is a directory that exists, we're moving it to "
|
||||
"'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should "
|
||||
"only see this message once."
|
||||
)
|
||||
shutil.move(old_default_cache_path, default_cache_path)
|
||||
|
||||
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
|
||||
PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
|
||||
|
Loading…
Reference in New Issue
Block a user