mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Set usedforsecurity=False
in hashlib methods (FIPS compliance) (#27483)
* Set usedforsecurity=False in hashlib methods (FIPS compliance) * trigger ci * tokenizers version * deps * bump hfh version * let's try this
This commit is contained in:
parent
5603fad247
commit
fd65aa9818
@ -1,5 +1,4 @@
|
||||
import gzip
|
||||
import hashlib
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
@ -11,6 +10,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
from arguments import PreprocessingArguments
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
from minhash_deduplication import deduplicate_dataset
|
||||
|
||||
from transformers import AutoTokenizer, HfArgumentParser
|
||||
@ -21,7 +21,7 @@ PATTERN = re.compile(r"\s+")
|
||||
|
||||
def get_hash(example):
|
||||
"""Get hash of content field."""
|
||||
return {"hash": hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()}
|
||||
return {"hash": insecure_hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()}
|
||||
|
||||
|
||||
def line_stats(example):
|
||||
|
@ -28,7 +28,6 @@ import tempfile
|
||||
from collections import OrderedDict
|
||||
from contextlib import contextmanager
|
||||
from functools import partial
|
||||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
@ -39,6 +38,7 @@ import numpy as np
|
||||
import requests
|
||||
import wget
|
||||
from filelock import FileLock
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
from PIL import Image
|
||||
from tqdm.auto import tqdm
|
||||
from yaml import Loader, dump, load
|
||||
@ -402,12 +402,12 @@ def get_from_cache(
|
||||
|
||||
def url_to_filename(url, etag=None):
|
||||
url_bytes = url.encode("utf-8")
|
||||
url_hash = sha256(url_bytes)
|
||||
url_hash = insecure_hashlib.sha256(url_bytes)
|
||||
filename = url_hash.hexdigest()
|
||||
|
||||
if etag:
|
||||
etag_bytes = etag.encode("utf-8")
|
||||
etag_hash = sha256(etag_bytes)
|
||||
etag_hash = insecure_hashlib.sha256(etag_bytes)
|
||||
filename += "." + etag_hash.hexdigest()
|
||||
|
||||
if url.endswith(".h5"):
|
||||
|
@ -28,7 +28,6 @@ import tempfile
|
||||
from collections import OrderedDict
|
||||
from contextlib import contextmanager
|
||||
from functools import partial
|
||||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
@ -39,6 +38,7 @@ import numpy as np
|
||||
import requests
|
||||
import wget
|
||||
from filelock import FileLock
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
from PIL import Image
|
||||
from tqdm.auto import tqdm
|
||||
from yaml import Loader, dump, load
|
||||
@ -402,12 +402,12 @@ def get_from_cache(
|
||||
|
||||
def url_to_filename(url, etag=None):
|
||||
url_bytes = url.encode("utf-8")
|
||||
url_hash = sha256(url_bytes)
|
||||
url_hash = insecure_hashlib.sha256(url_bytes)
|
||||
filename = url_hash.hexdigest()
|
||||
|
||||
if etag:
|
||||
etag_bytes = etag.encode("utf-8")
|
||||
etag_hash = sha256(etag_bytes)
|
||||
etag_hash = insecure_hashlib.sha256(etag_bytes)
|
||||
filename += "." + etag_hash.hexdigest()
|
||||
|
||||
if url.endswith(".h5"):
|
||||
|
3
setup.py
3
setup.py
@ -118,7 +118,7 @@ _deps = [
|
||||
"fugashi>=1.0",
|
||||
"GitPython<3.1.19",
|
||||
"hf-doc-builder>=0.3.0",
|
||||
"huggingface-hub>=0.16.4,<1.0",
|
||||
"huggingface-hub>=0.19.3,<1.0",
|
||||
"importlib_metadata",
|
||||
"ipadic>=1.0.0,<2.0",
|
||||
"isort>=5.5.4",
|
||||
@ -321,6 +321,7 @@ extras["testing"] = (
|
||||
"rjieba",
|
||||
"beautifulsoup4",
|
||||
"tensorboard",
|
||||
"pydantic",
|
||||
)
|
||||
+ extras["retrieval"]
|
||||
+ extras["modelcreation"]
|
||||
|
@ -25,7 +25,7 @@ deps = {
|
||||
"fugashi": "fugashi>=1.0",
|
||||
"GitPython": "GitPython<3.1.19",
|
||||
"hf-doc-builder": "hf-doc-builder>=0.3.0",
|
||||
"huggingface-hub": "huggingface-hub>=0.16.4,<1.0",
|
||||
"huggingface-hub": "huggingface-hub>=0.19.3,<1.0",
|
||||
"importlib_metadata": "importlib_metadata",
|
||||
"ipadic": "ipadic>=1.0.0,<2.0",
|
||||
"isort": "isort>=5.5.4",
|
||||
|
@ -15,7 +15,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
@ -24,6 +23,7 @@ import urllib
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
@ -114,7 +114,7 @@ def _download(url: str, root: str) -> io.BytesIO:
|
||||
|
||||
if os.path.isfile(download_target):
|
||||
model_bytes = open(download_target, "rb").read()
|
||||
if hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
|
||||
if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
|
||||
return torch.load(io.BytesIO(model_bytes))
|
||||
else:
|
||||
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
|
||||
@ -132,7 +132,7 @@ def _download(url: str, root: str) -> io.BytesIO:
|
||||
loop.update(len(buffer))
|
||||
|
||||
model_bytes = open(download_target, "rb").read()
|
||||
if hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
|
||||
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
|
||||
raise RuntimeError(
|
||||
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
|
||||
)
|
||||
|
@ -12,9 +12,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import hashlib
|
||||
import unittest
|
||||
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
|
||||
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
|
||||
from transformers.pipelines import DepthEstimationPipeline, pipeline
|
||||
from transformers.testing_utils import (
|
||||
@ -44,7 +45,7 @@ else:
|
||||
|
||||
|
||||
def hashimage(image: Image) -> str:
|
||||
m = hashlib.md5(image.tobytes())
|
||||
m = insecure_hashlib.md5(image.tobytes())
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
|
@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import hashlib
|
||||
import tempfile
|
||||
import unittest
|
||||
from typing import Dict
|
||||
@ -21,6 +20,7 @@ import datasets
|
||||
import numpy as np
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
|
||||
@ -59,7 +59,7 @@ else:
|
||||
|
||||
|
||||
def hashimage(image: Image) -> str:
|
||||
m = hashlib.md5(image.tobytes())
|
||||
m = insecure_hashlib.md5(image.tobytes())
|
||||
return m.hexdigest()[:10]
|
||||
|
||||
|
||||
|
@ -12,11 +12,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import hashlib
|
||||
import unittest
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_MASK_GENERATION_MAPPING,
|
||||
@ -46,7 +46,7 @@ else:
|
||||
|
||||
|
||||
def hashimage(image: Image) -> str:
|
||||
m = hashlib.md5(image.tobytes())
|
||||
m = insecure_hashlib.md5(image.tobytes())
|
||||
return m.hexdigest()[:10]
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user