Stop relying on huggingface_hub's private methods (#19392)

* Leverage hfh for move cache

* Style
This commit is contained in:
Lysandre Debut 2022-10-10 15:19:33 +02:00 committed by GitHub
parent 7d5ce6802e
commit 298f6a98c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -34,11 +34,11 @@ from huggingface_hub import (
HfFolder,
create_commit,
create_repo,
get_hf_file_metadata,
hf_hub_download,
hf_hub_url,
whoami,
)
from huggingface_hub.constants import HUGGINGFACE_HEADER_X_LINKED_ETAG, HUGGINGFACE_HEADER_X_REPO_COMMIT
from huggingface_hub.file_download import REGEX_COMMIT_HASH, http_get
from huggingface_hub.utils import (
EntryNotFoundError,
@ -982,26 +982,6 @@ def get_all_cached_files(cache_dir=None):
return cached_files
def get_hub_metadata(url, token=None):
"""
Returns the commit hash and associated etag for a given url.
"""
if token is None:
token = HfFolder.get_token()
headers = {"user-agent": http_user_agent()}
headers["authorization"] = f"Bearer {token}"
r = huggingface_hub.file_download._request_with_retry(
method="HEAD", url=url, headers=headers, allow_redirects=False
)
hf_raise_for_status(r)
commit_hash = r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT)
etag = r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
if etag is not None:
etag = huggingface_hub.file_download._normalize_etag(etag)
return etag, commit_hash
def extract_info_from_url(url):
"""
Extract repo_name, revision and filename from an url.
@ -1069,11 +1049,11 @@ def move_cache(cache_dir=None, new_cache_dir=None, token=None):
url = file_info.pop("url")
if url not in hub_metadata:
try:
hub_metadata[url] = get_hub_metadata(url, token=token)
hub_metadata[url] = get_hf_file_metadata(url, use_auth_token=token)
except requests.HTTPError:
continue
etag, commit_hash = hub_metadata[url]
etag, commit_hash = hub_metadata[url].etag, hub_metadata[url].commit_hash
if etag is None or commit_hash is None:
continue