mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Use pyupgrade --py39-plus to improve code (#36843)
This commit is contained in:
parent
3e8f0fbf44
commit
ce091b1bda
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2023 The HuggingFace Inc. team and the librosa & torchaudio authors.
|
# Copyright 2023 The HuggingFace Inc. team and the librosa & torchaudio authors.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -18,7 +17,7 @@ and remove unnecessary dependencies.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -146,7 +145,7 @@ def chroma_filter_bank(
|
|||||||
sampling_rate: int,
|
sampling_rate: int,
|
||||||
tuning: float = 0.0,
|
tuning: float = 0.0,
|
||||||
power: Optional[float] = 2.0,
|
power: Optional[float] = 2.0,
|
||||||
weighting_parameters: Optional[Tuple[float, float]] = (5.0, 2.0),
|
weighting_parameters: Optional[tuple[float, float]] = (5.0, 2.0),
|
||||||
start_at_c_chroma: Optional[bool] = True,
|
start_at_c_chroma: Optional[bool] = True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -592,7 +591,7 @@ def spectrogram(
|
|||||||
|
|
||||||
|
|
||||||
def spectrogram_batch(
|
def spectrogram_batch(
|
||||||
waveform_list: List[np.ndarray],
|
waveform_list: list[np.ndarray],
|
||||||
window: np.ndarray,
|
window: np.ndarray,
|
||||||
frame_length: int,
|
frame_length: int,
|
||||||
hop_length: int,
|
hop_length: int,
|
||||||
@ -611,7 +610,7 @@ def spectrogram_batch(
|
|||||||
db_range: Optional[float] = None,
|
db_range: Optional[float] = None,
|
||||||
remove_dc_offset: Optional[bool] = None,
|
remove_dc_offset: Optional[bool] = None,
|
||||||
dtype: np.dtype = np.float32,
|
dtype: np.dtype = np.float32,
|
||||||
) -> List[np.ndarray]:
|
) -> list[np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Calculates spectrograms for a list of waveforms using the Short-Time Fourier Transform, optimized for batch processing.
|
Calculates spectrograms for a list of waveforms using the Short-Time Fourier Transform, optimized for batch processing.
|
||||||
This function extends the capabilities of the `spectrogram` function to handle multiple waveforms efficiently by leveraging broadcasting.
|
This function extends the capabilities of the `spectrogram` function to handle multiple waveforms efficiently by leveraging broadcasting.
|
||||||
|
@ -16,7 +16,7 @@ import warnings
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from os import listdir, makedirs
|
from os import listdir, makedirs
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Optional
|
||||||
|
|
||||||
from packaging.version import Version, parse
|
from packaging.version import Version, parse
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ def ensure_valid_input(model, tokens, input_names):
|
|||||||
return ordered_input_names, tuple(model_args)
|
return ordered_input_names, tuple(model_args)
|
||||||
|
|
||||||
|
|
||||||
def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List[str], Dict, BatchEncoding]:
|
def infer_shapes(nlp: Pipeline, framework: str) -> tuple[list[str], list[str], dict, BatchEncoding]:
|
||||||
"""
|
"""
|
||||||
Attempt to infer the static vs dynamic axes for each input and output tensors for a specific model
|
Attempt to infer the static vs dynamic axes for each input and output tensors for a specific model
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The HuggingFace Inc. team.
|
# Copyright 2018 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The HuggingFace Inc. team.
|
# Copyright 2018 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -20,7 +19,6 @@ allow to make our dependency on SentencePiece optional.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Dict, List, Tuple
|
|
||||||
|
|
||||||
from packaging import version
|
from packaging import version
|
||||||
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
||||||
@ -91,7 +89,7 @@ class SentencePieceExtractor:
|
|||||||
self.sp = SentencePieceProcessor()
|
self.sp = SentencePieceProcessor()
|
||||||
self.sp.Load(model)
|
self.sp.Load(model)
|
||||||
|
|
||||||
def extract(self, vocab_scores=None) -> Tuple[Dict[str, int], List[Tuple]]:
|
def extract(self, vocab_scores=None) -> tuple[dict[str, int], list[tuple]]:
|
||||||
"""
|
"""
|
||||||
By default will return vocab and merges with respect to their order, by sending `vocab_scores` we're going to
|
By default will return vocab and merges with respect to their order, by sending `vocab_scores` we're going to
|
||||||
order the merges with respect to the piece scores instead.
|
order the merges with respect to the piece scores instead.
|
||||||
@ -105,7 +103,7 @@ class SentencePieceExtractor:
|
|||||||
|
|
||||||
|
|
||||||
class GemmaSentencePieceExtractor(SentencePieceExtractor):
|
class GemmaSentencePieceExtractor(SentencePieceExtractor):
|
||||||
def extract(self, vocab_scores=None) -> Tuple[Dict[str, int], List[Tuple]]:
|
def extract(self, vocab_scores=None) -> tuple[dict[str, int], list[tuple]]:
|
||||||
"""
|
"""
|
||||||
By default will return vocab and merges with respect to their order, by sending `vocab_scores` we're going to
|
By default will return vocab and merges with respect to their order, by sending `vocab_scores` we're going to
|
||||||
order the merges with respect to the piece scores instead.
|
order the merges with respect to the piece scores instead.
|
||||||
@ -328,7 +326,7 @@ class OpenAIGPTConverter(Converter):
|
|||||||
|
|
||||||
|
|
||||||
class GPT2Converter(Converter):
|
class GPT2Converter(Converter):
|
||||||
def converted(self, vocab: Dict[str, int] = None, merges: List[Tuple[str, str]] = None) -> Tokenizer:
|
def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer:
|
||||||
if not vocab:
|
if not vocab:
|
||||||
vocab = self.original_tokenizer.encoder
|
vocab = self.original_tokenizer.encoder
|
||||||
if not merges:
|
if not merges:
|
||||||
@ -397,7 +395,7 @@ class HerbertConverter(Converter):
|
|||||||
|
|
||||||
|
|
||||||
class Qwen2Converter(Converter):
|
class Qwen2Converter(Converter):
|
||||||
def converted(self, vocab: Dict[str, int] = None, merges: List[Tuple[str, str]] = None) -> Tokenizer:
|
def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer:
|
||||||
if not vocab:
|
if not vocab:
|
||||||
vocab = self.original_tokenizer.encoder
|
vocab = self.original_tokenizer.encoder
|
||||||
if not merges:
|
if not merges:
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The HuggingFace Inc. team.
|
# Copyright 2018 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020 The HuggingFace Inc. team.
|
# Copyright 2020 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2021 The HuggingFace Inc. team.
|
# Copyright 2021 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -16,7 +15,7 @@
|
|||||||
Sequence feature extraction class for common feature extractors to preprocess sequences.
|
Sequence feature extraction class for common feature extractors to preprocess sequences.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -54,10 +53,10 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
|
|||||||
self,
|
self,
|
||||||
processed_features: Union[
|
processed_features: Union[
|
||||||
BatchFeature,
|
BatchFeature,
|
||||||
List[BatchFeature],
|
list[BatchFeature],
|
||||||
Dict[str, BatchFeature],
|
dict[str, BatchFeature],
|
||||||
Dict[str, List[BatchFeature]],
|
dict[str, list[BatchFeature]],
|
||||||
List[Dict[str, BatchFeature]],
|
list[dict[str, BatchFeature]],
|
||||||
],
|
],
|
||||||
padding: Union[bool, str, PaddingStrategy] = True,
|
padding: Union[bool, str, PaddingStrategy] = True,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
@ -226,7 +225,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
|
|||||||
|
|
||||||
def _pad(
|
def _pad(
|
||||||
self,
|
self,
|
||||||
processed_features: Union[Dict[str, np.ndarray], BatchFeature],
|
processed_features: Union[dict[str, np.ndarray], BatchFeature],
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@ -298,7 +297,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
|
|||||||
|
|
||||||
def _truncate(
|
def _truncate(
|
||||||
self,
|
self,
|
||||||
processed_features: Union[Dict[str, np.ndarray], BatchFeature],
|
processed_features: Union[dict[str, np.ndarray], BatchFeature],
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
truncation: Optional[bool] = None,
|
truncation: Optional[bool] = None,
|
||||||
|
@ -18,11 +18,12 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError
|
||||||
|
from collections.abc import Iterable
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from inspect import isclass
|
from inspect import isclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Dict, Iterable, List, Literal, NewType, Optional, Tuple, Union, get_type_hints
|
from typing import Any, Callable, Literal, NewType, Optional, Union, get_type_hints
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@ -62,7 +63,7 @@ def make_choice_type_function(choices: list) -> Callable[[str], Any]:
|
|||||||
|
|
||||||
def HfArg(
|
def HfArg(
|
||||||
*,
|
*,
|
||||||
aliases: Union[str, List[str]] = None,
|
aliases: Union[str, list[str]] = None,
|
||||||
help: str = None,
|
help: str = None,
|
||||||
default: Any = dataclasses.MISSING,
|
default: Any = dataclasses.MISSING,
|
||||||
default_factory: Callable[[], Any] = dataclasses.MISSING,
|
default_factory: Callable[[], Any] = dataclasses.MISSING,
|
||||||
@ -254,7 +255,7 @@ class HfArgumentParser(ArgumentParser):
|
|||||||
parser = self
|
parser = self
|
||||||
|
|
||||||
try:
|
try:
|
||||||
type_hints: Dict[str, type] = get_type_hints(dtype)
|
type_hints: dict[str, type] = get_type_hints(dtype)
|
||||||
except NameError:
|
except NameError:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Type resolution failed for {dtype}. Try declaring the class in global scope or "
|
f"Type resolution failed for {dtype}. Try declaring the class in global scope or "
|
||||||
@ -288,7 +289,7 @@ class HfArgumentParser(ArgumentParser):
|
|||||||
look_for_args_file=True,
|
look_for_args_file=True,
|
||||||
args_filename=None,
|
args_filename=None,
|
||||||
args_file_flag=None,
|
args_file_flag=None,
|
||||||
) -> Tuple[DataClass, ...]:
|
) -> tuple[DataClass, ...]:
|
||||||
"""
|
"""
|
||||||
Parse command-line args into instances of the specified dataclass types.
|
Parse command-line args into instances of the specified dataclass types.
|
||||||
|
|
||||||
@ -367,7 +368,7 @@ class HfArgumentParser(ArgumentParser):
|
|||||||
|
|
||||||
return (*outputs,)
|
return (*outputs,)
|
||||||
|
|
||||||
def parse_dict(self, args: Dict[str, Any], allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
|
def parse_dict(self, args: dict[str, Any], allow_extra_keys: bool = False) -> tuple[DataClass, ...]:
|
||||||
"""
|
"""
|
||||||
Alternative helper method that does not use `argparse` at all, instead uses a dict and populating the dataclass
|
Alternative helper method that does not use `argparse` at all, instead uses a dict and populating the dataclass
|
||||||
types.
|
types.
|
||||||
@ -397,7 +398,7 @@ class HfArgumentParser(ArgumentParser):
|
|||||||
|
|
||||||
def parse_json_file(
|
def parse_json_file(
|
||||||
self, json_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
self, json_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
||||||
) -> Tuple[DataClass, ...]:
|
) -> tuple[DataClass, ...]:
|
||||||
"""
|
"""
|
||||||
Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
|
Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
|
||||||
dataclass types.
|
dataclass types.
|
||||||
@ -421,7 +422,7 @@ class HfArgumentParser(ArgumentParser):
|
|||||||
|
|
||||||
def parse_yaml_file(
|
def parse_yaml_file(
|
||||||
self, yaml_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
self, yaml_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
||||||
) -> Tuple[DataClass, ...]:
|
) -> tuple[DataClass, ...]:
|
||||||
"""
|
"""
|
||||||
Alternative helper method that does not use `argparse` at all, instead loading a yaml file and populating the
|
Alternative helper method that does not use `argparse` at all, instead loading a yaml file and populating the
|
||||||
dataclass types.
|
dataclass types.
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2023-present the HuggingFace Inc. team.
|
# Copyright 2023-present the HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 The HuggingFace Inc. team.
|
# Copyright 2022 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -14,7 +13,8 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from typing import Dict, Iterable, Optional, Union
|
from collections.abc import Iterable
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ class BaseImageProcessor(ImageProcessingMixin):
|
|||||||
def center_crop(
|
def center_crop(
|
||||||
self,
|
self,
|
||||||
image: np.ndarray,
|
image: np.ndarray,
|
||||||
size: Dict[str, int],
|
size: dict[str, int],
|
||||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
@ -207,7 +207,7 @@ def convert_to_size_dict(
|
|||||||
|
|
||||||
|
|
||||||
def get_size_dict(
|
def get_size_dict(
|
||||||
size: Union[int, Iterable[int], Dict[str, int]] = None,
|
size: Union[int, Iterable[int], dict[str, int]] = None,
|
||||||
max_size: Optional[int] = None,
|
max_size: Optional[int] = None,
|
||||||
height_width_order: bool = True,
|
height_width_order: bool = True,
|
||||||
default_to_square: bool = True,
|
default_to_square: bool = True,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2024 The HuggingFace Inc. team.
|
# Copyright 2024 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -13,8 +12,9 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
from functools import lru_cache, partial
|
from functools import lru_cache, partial
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, TypedDict, Union
|
from typing import Any, Optional, TypedDict, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -77,8 +77,8 @@ def validate_fast_preprocess_arguments(
|
|||||||
do_rescale: Optional[bool] = None,
|
do_rescale: Optional[bool] = None,
|
||||||
rescale_factor: Optional[float] = None,
|
rescale_factor: Optional[float] = None,
|
||||||
do_normalize: Optional[bool] = None,
|
do_normalize: Optional[bool] = None,
|
||||||
image_mean: Optional[Union[float, List[float]]] = None,
|
image_mean: Optional[Union[float, list[float]]] = None,
|
||||||
image_std: Optional[Union[float, List[float]]] = None,
|
image_std: Optional[Union[float, list[float]]] = None,
|
||||||
do_pad: Optional[bool] = None,
|
do_pad: Optional[bool] = None,
|
||||||
size_divisibility: Optional[int] = None,
|
size_divisibility: Optional[int] = None,
|
||||||
do_center_crop: Optional[bool] = None,
|
do_center_crop: Optional[bool] = None,
|
||||||
@ -128,14 +128,14 @@ def safe_squeeze(tensor: "torch.Tensor", axis: Optional[int] = None) -> "torch.T
|
|||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
def max_across_indices(values: Iterable[Any]) -> List[Any]:
|
def max_across_indices(values: Iterable[Any]) -> list[Any]:
|
||||||
"""
|
"""
|
||||||
Return the maximum value across all indices of an iterable of values.
|
Return the maximum value across all indices of an iterable of values.
|
||||||
"""
|
"""
|
||||||
return [max(values_i) for values_i in zip(*values)]
|
return [max(values_i) for values_i in zip(*values)]
|
||||||
|
|
||||||
|
|
||||||
def get_max_height_width(images: List["torch.Tensor"]) -> Tuple[int]:
|
def get_max_height_width(images: list["torch.Tensor"]) -> tuple[int]:
|
||||||
"""
|
"""
|
||||||
Get the maximum height and width across all images in a batch.
|
Get the maximum height and width across all images in a batch.
|
||||||
"""
|
"""
|
||||||
@ -147,7 +147,7 @@ def get_max_height_width(images: List["torch.Tensor"]) -> Tuple[int]:
|
|||||||
|
|
||||||
def divide_to_patches(
|
def divide_to_patches(
|
||||||
image: Union[np.array, "torch.Tensor"], patch_size: int
|
image: Union[np.array, "torch.Tensor"], patch_size: int
|
||||||
) -> List[Union[np.array, "torch.Tensor"]]:
|
) -> list[Union[np.array, "torch.Tensor"]]:
|
||||||
"""
|
"""
|
||||||
Divides an image into patches of a specified size.
|
Divides an image into patches of a specified size.
|
||||||
|
|
||||||
@ -171,16 +171,16 @@ def divide_to_patches(
|
|||||||
|
|
||||||
class DefaultFastImageProcessorKwargs(TypedDict, total=False):
|
class DefaultFastImageProcessorKwargs(TypedDict, total=False):
|
||||||
do_resize: Optional[bool]
|
do_resize: Optional[bool]
|
||||||
size: Optional[Dict[str, int]]
|
size: Optional[dict[str, int]]
|
||||||
default_to_square: Optional[bool]
|
default_to_square: Optional[bool]
|
||||||
resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]]
|
resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]]
|
||||||
do_center_crop: Optional[bool]
|
do_center_crop: Optional[bool]
|
||||||
crop_size: Optional[Dict[str, int]]
|
crop_size: Optional[dict[str, int]]
|
||||||
do_rescale: Optional[bool]
|
do_rescale: Optional[bool]
|
||||||
rescale_factor: Optional[Union[int, float]]
|
rescale_factor: Optional[Union[int, float]]
|
||||||
do_normalize: Optional[bool]
|
do_normalize: Optional[bool]
|
||||||
image_mean: Optional[Union[float, List[float]]]
|
image_mean: Optional[Union[float, list[float]]]
|
||||||
image_std: Optional[Union[float, List[float]]]
|
image_std: Optional[Union[float, list[float]]]
|
||||||
do_convert_rgb: Optional[bool]
|
do_convert_rgb: Optional[bool]
|
||||||
return_tensors: Optional[Union[str, TensorType]]
|
return_tensors: Optional[Union[str, TensorType]]
|
||||||
data_format: Optional[ChannelDimension]
|
data_format: Optional[ChannelDimension]
|
||||||
@ -427,8 +427,8 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
def _fuse_mean_std_and_rescale_factor(
|
def _fuse_mean_std_and_rescale_factor(
|
||||||
self,
|
self,
|
||||||
do_normalize: Optional[bool] = None,
|
do_normalize: Optional[bool] = None,
|
||||||
image_mean: Optional[Union[float, List[float]]] = None,
|
image_mean: Optional[Union[float, list[float]]] = None,
|
||||||
image_std: Optional[Union[float, List[float]]] = None,
|
image_std: Optional[Union[float, list[float]]] = None,
|
||||||
do_rescale: Optional[bool] = None,
|
do_rescale: Optional[bool] = None,
|
||||||
rescale_factor: Optional[float] = None,
|
rescale_factor: Optional[float] = None,
|
||||||
device: Optional["torch.device"] = None,
|
device: Optional["torch.device"] = None,
|
||||||
@ -446,8 +446,8 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
do_rescale: bool,
|
do_rescale: bool,
|
||||||
rescale_factor: float,
|
rescale_factor: float,
|
||||||
do_normalize: bool,
|
do_normalize: bool,
|
||||||
image_mean: Union[float, List[float]],
|
image_mean: Union[float, list[float]],
|
||||||
image_std: Union[float, List[float]],
|
image_std: Union[float, list[float]],
|
||||||
) -> "torch.Tensor":
|
) -> "torch.Tensor":
|
||||||
"""
|
"""
|
||||||
Rescale and normalize images.
|
Rescale and normalize images.
|
||||||
@ -471,7 +471,7 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
def center_crop(
|
def center_crop(
|
||||||
self,
|
self,
|
||||||
image: "torch.Tensor",
|
image: "torch.Tensor",
|
||||||
size: Dict[str, int],
|
size: dict[str, int],
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> "torch.Tensor":
|
) -> "torch.Tensor":
|
||||||
"""
|
"""
|
||||||
@ -576,7 +576,7 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
do_convert_rgb: bool = None,
|
do_convert_rgb: bool = None,
|
||||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
device: Optional["torch.device"] = None,
|
device: Optional["torch.device"] = None,
|
||||||
) -> List["torch.Tensor"]:
|
) -> list["torch.Tensor"]:
|
||||||
"""
|
"""
|
||||||
Prepare the input images for processing.
|
Prepare the input images for processing.
|
||||||
"""
|
"""
|
||||||
@ -599,8 +599,8 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
size: Optional[SizeDict] = None,
|
size: Optional[SizeDict] = None,
|
||||||
crop_size: Optional[SizeDict] = None,
|
crop_size: Optional[SizeDict] = None,
|
||||||
default_to_square: Optional[bool] = None,
|
default_to_square: Optional[bool] = None,
|
||||||
image_mean: Optional[Union[float, List[float]]] = None,
|
image_mean: Optional[Union[float, list[float]]] = None,
|
||||||
image_std: Optional[Union[float, List[float]]] = None,
|
image_std: Optional[Union[float, list[float]]] = None,
|
||||||
data_format: Optional[ChannelDimension] = None,
|
data_format: Optional[ChannelDimension] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
@ -701,7 +701,7 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
|
|
||||||
def _preprocess(
|
def _preprocess(
|
||||||
self,
|
self,
|
||||||
images: List["torch.Tensor"],
|
images: list["torch.Tensor"],
|
||||||
do_resize: bool,
|
do_resize: bool,
|
||||||
size: SizeDict,
|
size: SizeDict,
|
||||||
interpolation: Optional["F.InterpolationMode"],
|
interpolation: Optional["F.InterpolationMode"],
|
||||||
@ -710,8 +710,8 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
do_rescale: bool,
|
do_rescale: bool,
|
||||||
rescale_factor: float,
|
rescale_factor: float,
|
||||||
do_normalize: bool,
|
do_normalize: bool,
|
||||||
image_mean: Optional[Union[float, List[float]]],
|
image_mean: Optional[Union[float, list[float]]],
|
||||||
image_std: Optional[Union[float, List[float]]],
|
image_std: Optional[Union[float, list[float]]],
|
||||||
return_tensors: Optional[Union[str, TensorType]],
|
return_tensors: Optional[Union[str, TensorType]],
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchFeature:
|
) -> BatchFeature:
|
||||||
@ -749,7 +749,7 @@ class BaseImageProcessorFast(BaseImageProcessor):
|
|||||||
|
|
||||||
|
|
||||||
class SemanticSegmentationMixin:
|
class SemanticSegmentationMixin:
|
||||||
def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None):
|
def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple] = None):
|
||||||
"""
|
"""
|
||||||
Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch.
|
Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch.
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 The HuggingFace Inc. team.
|
# Copyright 2022 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -14,9 +13,9 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Collection
|
from collections.abc import Collection, Iterable
|
||||||
from math import ceil
|
from math import ceil
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -86,7 +85,7 @@ def to_channel_dimension_format(
|
|||||||
elif target_channel_dim == ChannelDimension.LAST:
|
elif target_channel_dim == ChannelDimension.LAST:
|
||||||
image = image.transpose((1, 2, 0))
|
image = image.transpose((1, 2, 0))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported channel dimension format: {}".format(channel_dim))
|
raise ValueError(f"Unsupported channel dimension format: {channel_dim}")
|
||||||
|
|
||||||
return image
|
return image
|
||||||
|
|
||||||
@ -192,7 +191,7 @@ def to_pil_image(
|
|||||||
elif is_jax_tensor(image):
|
elif is_jax_tensor(image):
|
||||||
image = np.array(image)
|
image = np.array(image)
|
||||||
elif not isinstance(image, np.ndarray):
|
elif not isinstance(image, np.ndarray):
|
||||||
raise ValueError("Input image type not supported: {}".format(type(image)))
|
raise ValueError(f"Input image type not supported: {type(image)}")
|
||||||
|
|
||||||
# If the channel has been moved to first dim, we put it back at the end.
|
# If the channel has been moved to first dim, we put it back at the end.
|
||||||
image = to_channel_dimension_format(image, ChannelDimension.LAST, input_data_format)
|
image = to_channel_dimension_format(image, ChannelDimension.LAST, input_data_format)
|
||||||
@ -210,7 +209,7 @@ def to_pil_image(
|
|||||||
return PIL.Image.fromarray(image, mode=image_mode)
|
return PIL.Image.fromarray(image, mode=image_mode)
|
||||||
|
|
||||||
|
|
||||||
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Computes the output image size given the input image size and the desired output size.
|
Computes the output image size given the input image size and the desired output size.
|
||||||
|
|
||||||
@ -252,7 +251,7 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
# Logic adapted from torchvision resizing logic: https://github.com/pytorch/vision/blob/511924c1ced4ce0461197e5caa64ce5b9e558aab/torchvision/transforms/functional.py#L366
|
# Logic adapted from torchvision resizing logic: https://github.com/pytorch/vision/blob/511924c1ced4ce0461197e5caa64ce5b9e558aab/torchvision/transforms/functional.py#L366
|
||||||
def get_resize_output_image_size(
|
def get_resize_output_image_size(
|
||||||
input_image: np.ndarray,
|
input_image: np.ndarray,
|
||||||
size: Union[int, Tuple[int, int], List[int], Tuple[int]],
|
size: Union[int, tuple[int, int], list[int], tuple[int]],
|
||||||
default_to_square: bool = True,
|
default_to_square: bool = True,
|
||||||
max_size: Optional[int] = None,
|
max_size: Optional[int] = None,
|
||||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
@ -319,7 +318,7 @@ def get_resize_output_image_size(
|
|||||||
|
|
||||||
def resize(
|
def resize(
|
||||||
image: np.ndarray,
|
image: np.ndarray,
|
||||||
size: Tuple[int, int],
|
size: tuple[int, int],
|
||||||
resample: "PILImageResampling" = None,
|
resample: "PILImageResampling" = None,
|
||||||
reducing_gap: Optional[int] = None,
|
reducing_gap: Optional[int] = None,
|
||||||
data_format: Optional[ChannelDimension] = None,
|
data_format: Optional[ChannelDimension] = None,
|
||||||
@ -451,7 +450,7 @@ def normalize(
|
|||||||
|
|
||||||
def center_crop(
|
def center_crop(
|
||||||
image: np.ndarray,
|
image: np.ndarray,
|
||||||
size: Tuple[int, int],
|
size: tuple[int, int],
|
||||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
return_numpy: Optional[bool] = None,
|
return_numpy: Optional[bool] = None,
|
||||||
@ -705,7 +704,7 @@ class PaddingMode(ExplicitEnum):
|
|||||||
|
|
||||||
def pad(
|
def pad(
|
||||||
image: np.ndarray,
|
image: np.ndarray,
|
||||||
padding: Union[int, Tuple[int, int], Iterable[Tuple[int, int]]],
|
padding: Union[int, tuple[int, int], Iterable[tuple[int, int]]],
|
||||||
mode: PaddingMode = PaddingMode.CONSTANT,
|
mode: PaddingMode = PaddingMode.CONSTANT,
|
||||||
constant_values: Union[float, Iterable[float]] = 0.0,
|
constant_values: Union[float, Iterable[float]] = 0.0,
|
||||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||||
@ -855,8 +854,8 @@ def _cast_tensor_to_float(x):
|
|||||||
|
|
||||||
|
|
||||||
def group_images_by_shape(
|
def group_images_by_shape(
|
||||||
images: List["torch.Tensor"],
|
images: list["torch.Tensor"],
|
||||||
) -> Tuple[Dict[Tuple[int, int], List["torch.Tensor"]], Dict[int, Tuple[Tuple[int, int], int]]]:
|
) -> tuple[dict[tuple[int, int], list["torch.Tensor"]], dict[int, tuple[tuple[int, int], int]]]:
|
||||||
"""
|
"""
|
||||||
Groups images by shape.
|
Groups images by shape.
|
||||||
Returns a dictionary with the shape as key and a list of images with that shape as value,
|
Returns a dictionary with the shape as key and a list of images with that shape as value,
|
||||||
@ -876,8 +875,8 @@ def group_images_by_shape(
|
|||||||
|
|
||||||
|
|
||||||
def reorder_images(
|
def reorder_images(
|
||||||
processed_images: Dict[Tuple[int, int], "torch.Tensor"], grouped_images_index: Dict[int, Tuple[int, int]]
|
processed_images: dict[tuple[int, int], "torch.Tensor"], grouped_images_index: dict[int, tuple[int, int]]
|
||||||
) -> List["torch.Tensor"]:
|
) -> list["torch.Tensor"]:
|
||||||
"""
|
"""
|
||||||
Reconstructs a list of images in the original order.
|
Reconstructs a list of images in the original order.
|
||||||
"""
|
"""
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2021 The HuggingFace Inc. team.
|
# Copyright 2021 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -15,10 +14,11 @@
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
|
from collections.abc import Iterable
|
||||||
from contextlib import redirect_stdout
|
from contextlib import redirect_stdout
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, Callable, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
@ -83,19 +83,19 @@ logger = logging.get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
ImageInput = Union[
|
ImageInput = Union[
|
||||||
"PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"]
|
"PIL.Image.Image", np.ndarray, "torch.Tensor", list["PIL.Image.Image"], list[np.ndarray], list["torch.Tensor"]
|
||||||
] # noqa
|
] # noqa
|
||||||
|
|
||||||
|
|
||||||
VideoInput = Union[
|
VideoInput = Union[
|
||||||
List["PIL.Image.Image"],
|
list["PIL.Image.Image"],
|
||||||
"np.ndarray",
|
"np.ndarray",
|
||||||
"torch.Tensor",
|
"torch.Tensor",
|
||||||
List["np.ndarray"],
|
list["np.ndarray"],
|
||||||
List["torch.Tensor"],
|
list["torch.Tensor"],
|
||||||
List[List["PIL.Image.Image"]],
|
list[list["PIL.Image.Image"]],
|
||||||
List[List["np.ndarrray"]],
|
list[list["np.ndarrray"]],
|
||||||
List[List["torch.Tensor"]],
|
list[list["torch.Tensor"]],
|
||||||
] # noqa
|
] # noqa
|
||||||
|
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ class VideoMetadata:
|
|||||||
video_backend: str
|
video_backend: str
|
||||||
|
|
||||||
|
|
||||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
AnnotationType = dict[str, Union[int, str, list[dict]]]
|
||||||
|
|
||||||
|
|
||||||
def is_pil_image(img):
|
def is_pil_image(img):
|
||||||
@ -155,7 +155,7 @@ def is_valid_image(img):
|
|||||||
return is_pil_image(img) or is_numpy_array(img) or is_torch_tensor(img) or is_tf_tensor(img) or is_jax_tensor(img)
|
return is_pil_image(img) or is_numpy_array(img) or is_torch_tensor(img) or is_tf_tensor(img) or is_jax_tensor(img)
|
||||||
|
|
||||||
|
|
||||||
def is_valid_list_of_images(images: List):
|
def is_valid_list_of_images(images: list):
|
||||||
return images and all(is_valid_image(image) for image in images)
|
return images and all(is_valid_image(image) for image in images)
|
||||||
|
|
||||||
|
|
||||||
@ -188,7 +188,7 @@ def is_scaled_image(image: np.ndarray) -> bool:
|
|||||||
return np.min(image) >= 0 and np.max(image) <= 1
|
return np.min(image) >= 0 and np.max(image) <= 1
|
||||||
|
|
||||||
|
|
||||||
def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
|
def make_list_of_images(images, expected_ndims: int = 3) -> list[ImageInput]:
|
||||||
"""
|
"""
|
||||||
Ensure that the output is a list of images. If the input is a single image, it is converted to a list of length 1.
|
Ensure that the output is a list of images. If the input is a single image, it is converted to a list of length 1.
|
||||||
If the input is a batch of images, it is converted to a list of images.
|
If the input is a batch of images, it is converted to a list of images.
|
||||||
@ -228,7 +228,7 @@ def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
|
|||||||
|
|
||||||
|
|
||||||
def make_flat_list_of_images(
|
def make_flat_list_of_images(
|
||||||
images: Union[List[ImageInput], ImageInput],
|
images: Union[list[ImageInput], ImageInput],
|
||||||
) -> ImageInput:
|
) -> ImageInput:
|
||||||
"""
|
"""
|
||||||
Ensure that the output is a flat list of images. If the input is a single image, it is converted to a list of length 1.
|
Ensure that the output is a flat list of images. If the input is a single image, it is converted to a list of length 1.
|
||||||
@ -263,7 +263,7 @@ def make_flat_list_of_images(
|
|||||||
|
|
||||||
|
|
||||||
def make_nested_list_of_images(
|
def make_nested_list_of_images(
|
||||||
images: Union[List[ImageInput], ImageInput],
|
images: Union[list[ImageInput], ImageInput],
|
||||||
) -> ImageInput:
|
) -> ImageInput:
|
||||||
"""
|
"""
|
||||||
Ensure that the output is a nested list of images.
|
Ensure that the output is a nested list of images.
|
||||||
@ -339,7 +339,7 @@ def to_numpy_array(img) -> np.ndarray:
|
|||||||
|
|
||||||
|
|
||||||
def infer_channel_dimension_format(
|
def infer_channel_dimension_format(
|
||||||
image: np.ndarray, num_channels: Optional[Union[int, Tuple[int, ...]]] = None
|
image: np.ndarray, num_channels: Optional[Union[int, tuple[int, ...]]] = None
|
||||||
) -> ChannelDimension:
|
) -> ChannelDimension:
|
||||||
"""
|
"""
|
||||||
Infers the channel dimension format of `image`.
|
Infers the channel dimension format of `image`.
|
||||||
@ -399,7 +399,7 @@ def get_channel_dimension_axis(
|
|||||||
raise ValueError(f"Unsupported data format: {input_data_format}")
|
raise ValueError(f"Unsupported data format: {input_data_format}")
|
||||||
|
|
||||||
|
|
||||||
def get_image_size(image: np.ndarray, channel_dim: ChannelDimension = None) -> Tuple[int, int]:
|
def get_image_size(image: np.ndarray, channel_dim: ChannelDimension = None) -> tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Returns the (height, width) dimensions of the image.
|
Returns the (height, width) dimensions of the image.
|
||||||
|
|
||||||
@ -424,10 +424,10 @@ def get_image_size(image: np.ndarray, channel_dim: ChannelDimension = None) -> T
|
|||||||
|
|
||||||
|
|
||||||
def get_image_size_for_max_height_width(
|
def get_image_size_for_max_height_width(
|
||||||
image_size: Tuple[int, int],
|
image_size: tuple[int, int],
|
||||||
max_height: int,
|
max_height: int,
|
||||||
max_width: int,
|
max_width: int,
|
||||||
) -> Tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Computes the output image size given the input image and the maximum allowed height and width. Keep aspect ratio.
|
Computes the output image size given the input image and the maximum allowed height and width. Keep aspect ratio.
|
||||||
Important, even if image_height < max_height and image_width < max_width, the image will be resized
|
Important, even if image_height < max_height and image_width < max_width, the image will be resized
|
||||||
@ -454,7 +454,7 @@ def get_image_size_for_max_height_width(
|
|||||||
return new_height, new_width
|
return new_height, new_width
|
||||||
|
|
||||||
|
|
||||||
def is_valid_annotation_coco_detection(annotation: Dict[str, Union[List, Tuple]]) -> bool:
|
def is_valid_annotation_coco_detection(annotation: dict[str, Union[list, tuple]]) -> bool:
|
||||||
if (
|
if (
|
||||||
isinstance(annotation, dict)
|
isinstance(annotation, dict)
|
||||||
and "image_id" in annotation
|
and "image_id" in annotation
|
||||||
@ -469,7 +469,7 @@ def is_valid_annotation_coco_detection(annotation: Dict[str, Union[List, Tuple]]
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_valid_annotation_coco_panoptic(annotation: Dict[str, Union[List, Tuple]]) -> bool:
|
def is_valid_annotation_coco_panoptic(annotation: dict[str, Union[list, tuple]]) -> bool:
|
||||||
if (
|
if (
|
||||||
isinstance(annotation, dict)
|
isinstance(annotation, dict)
|
||||||
and "image_id" in annotation
|
and "image_id" in annotation
|
||||||
@ -485,11 +485,11 @@ def is_valid_annotation_coco_panoptic(annotation: Dict[str, Union[List, Tuple]])
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def valid_coco_detection_annotations(annotations: Iterable[Dict[str, Union[List, Tuple]]]) -> bool:
|
def valid_coco_detection_annotations(annotations: Iterable[dict[str, Union[list, tuple]]]) -> bool:
|
||||||
return all(is_valid_annotation_coco_detection(ann) for ann in annotations)
|
return all(is_valid_annotation_coco_detection(ann) for ann in annotations)
|
||||||
|
|
||||||
|
|
||||||
def valid_coco_panoptic_annotations(annotations: Iterable[Dict[str, Union[List, Tuple]]]) -> bool:
|
def valid_coco_panoptic_annotations(annotations: Iterable[dict[str, Union[list, tuple]]]) -> bool:
|
||||||
return all(is_valid_annotation_coco_panoptic(ann) for ann in annotations)
|
return all(is_valid_annotation_coco_panoptic(ann) for ann in annotations)
|
||||||
|
|
||||||
|
|
||||||
@ -880,8 +880,8 @@ def load_video(
|
|||||||
|
|
||||||
|
|
||||||
def load_images(
|
def load_images(
|
||||||
images: Union[List, Tuple, str, "PIL.Image.Image"], timeout: Optional[float] = None
|
images: Union[list, tuple, str, "PIL.Image.Image"], timeout: Optional[float] = None
|
||||||
) -> Union["PIL.Image.Image", List["PIL.Image.Image"], List[List["PIL.Image.Image"]]]:
|
) -> Union["PIL.Image.Image", list["PIL.Image.Image"], list[list["PIL.Image.Image"]]]:
|
||||||
"""Loads images, handling different levels of nesting.
|
"""Loads images, handling different levels of nesting.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -904,14 +904,14 @@ def validate_preprocess_arguments(
|
|||||||
do_rescale: Optional[bool] = None,
|
do_rescale: Optional[bool] = None,
|
||||||
rescale_factor: Optional[float] = None,
|
rescale_factor: Optional[float] = None,
|
||||||
do_normalize: Optional[bool] = None,
|
do_normalize: Optional[bool] = None,
|
||||||
image_mean: Optional[Union[float, List[float]]] = None,
|
image_mean: Optional[Union[float, list[float]]] = None,
|
||||||
image_std: Optional[Union[float, List[float]]] = None,
|
image_std: Optional[Union[float, list[float]]] = None,
|
||||||
do_pad: Optional[bool] = None,
|
do_pad: Optional[bool] = None,
|
||||||
size_divisibility: Optional[int] = None,
|
size_divisibility: Optional[int] = None,
|
||||||
do_center_crop: Optional[bool] = None,
|
do_center_crop: Optional[bool] = None,
|
||||||
crop_size: Optional[Dict[str, int]] = None,
|
crop_size: Optional[dict[str, int]] = None,
|
||||||
do_resize: Optional[bool] = None,
|
do_resize: Optional[bool] = None,
|
||||||
size: Optional[Dict[str, int]] = None,
|
size: Optional[dict[str, int]] = None,
|
||||||
resample: Optional["PILImageResampling"] = None,
|
resample: Optional["PILImageResampling"] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -1295,8 +1295,8 @@ class ImageFeatureExtractionMixin:
|
|||||||
|
|
||||||
def validate_annotations(
|
def validate_annotations(
|
||||||
annotation_format: AnnotationFormat,
|
annotation_format: AnnotationFormat,
|
||||||
supported_annotation_formats: Tuple[AnnotationFormat, ...],
|
supported_annotation_formats: tuple[AnnotationFormat, ...],
|
||||||
annotations: List[Dict],
|
annotations: list[dict],
|
||||||
) -> None:
|
) -> None:
|
||||||
if annotation_format not in supported_annotation_formats:
|
if annotation_format not in supported_annotation_formats:
|
||||||
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
|
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
|
||||||
@ -1318,7 +1318,7 @@ def validate_annotations(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]):
|
def validate_kwargs(valid_processor_keys: list[str], captured_kwargs: list[str]):
|
||||||
unused_keys = set(captured_kwargs).difference(set(valid_processor_keys))
|
unused_keys = set(captured_kwargs).difference(set(valid_processor_keys))
|
||||||
if unused_keys:
|
if unused_keys:
|
||||||
unused_key_str = ", ".join(unused_keys)
|
unused_key_str = ", ".join(unused_keys)
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import Callable, List, Optional, Union
|
from typing import Callable, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
@ -79,8 +79,8 @@ class KerasMetricCallback(keras.callbacks.Callback):
|
|||||||
self,
|
self,
|
||||||
metric_fn: Callable,
|
metric_fn: Callable,
|
||||||
eval_dataset: Union[tf.data.Dataset, np.ndarray, tf.Tensor, tuple, dict],
|
eval_dataset: Union[tf.data.Dataset, np.ndarray, tf.Tensor, tuple, dict],
|
||||||
output_cols: Optional[List[str]] = None,
|
output_cols: Optional[list[str]] = None,
|
||||||
label_cols: Optional[List[str]] = None,
|
label_cols: Optional[list[str]] = None,
|
||||||
batch_size: Optional[int] = None,
|
batch_size: Optional[int] = None,
|
||||||
predict_with_generate: bool = False,
|
predict_with_generate: bool = False,
|
||||||
use_xla_generation: bool = False,
|
use_xla_generation: bool = False,
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@ -301,7 +301,7 @@ class AttentionMaskConverter:
|
|||||||
|
|
||||||
def _prepare_4d_causal_attention_mask(
|
def _prepare_4d_causal_attention_mask(
|
||||||
attention_mask: Optional[torch.Tensor],
|
attention_mask: Optional[torch.Tensor],
|
||||||
input_shape: Union[torch.Size, Tuple, List],
|
input_shape: Union[torch.Size, tuple, list],
|
||||||
inputs_embeds: torch.Tensor,
|
inputs_embeds: torch.Tensor,
|
||||||
past_key_values_length: int,
|
past_key_values_length: int,
|
||||||
sliding_window: Optional[int] = None,
|
sliding_window: Optional[int] = None,
|
||||||
@ -354,7 +354,7 @@ def _prepare_4d_causal_attention_mask(
|
|||||||
# Adapted from _prepare_4d_causal_attention_mask
|
# Adapted from _prepare_4d_causal_attention_mask
|
||||||
def _prepare_4d_causal_attention_mask_for_sdpa(
|
def _prepare_4d_causal_attention_mask_for_sdpa(
|
||||||
attention_mask: Optional[torch.Tensor],
|
attention_mask: Optional[torch.Tensor],
|
||||||
input_shape: Union[torch.Size, Tuple, List],
|
input_shape: Union[torch.Size, tuple, list],
|
||||||
inputs_embeds: torch.Tensor,
|
inputs_embeds: torch.Tensor,
|
||||||
past_key_values_length: int,
|
past_key_values_length: int,
|
||||||
sliding_window: Optional[int] = None,
|
sliding_window: Optional[int] = None,
|
||||||
@ -452,7 +452,7 @@ def _prepare_4d_attention_mask_for_sdpa(mask: torch.Tensor, dtype: torch.dtype,
|
|||||||
|
|
||||||
|
|
||||||
def _create_4d_causal_attention_mask(
|
def _create_4d_causal_attention_mask(
|
||||||
input_shape: Union[torch.Size, Tuple, List],
|
input_shape: Union[torch.Size, tuple, list],
|
||||||
dtype: torch.dtype,
|
dtype: torch.dtype,
|
||||||
device: torch.device,
|
device: torch.device,
|
||||||
past_key_values_length: int = 0,
|
past_key_values_length: int = 0,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2024 The Fairseq Authors and the HuggingFace Inc. team. All rights reserved.
|
# Copyright 2024 The Fairseq Authors and the HuggingFace Inc. team. All rights reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -15,7 +14,7 @@
|
|||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
from typing import Optional, Tuple, TypedDict
|
from typing import Optional, TypedDict
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@ -33,7 +32,7 @@ if is_flash_attn_2_available():
|
|||||||
_flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)
|
_flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)
|
||||||
|
|
||||||
|
|
||||||
def _get_unpad_data(attention_mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, int]:
|
def _get_unpad_data(attention_mask: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, int]:
|
||||||
"""
|
"""
|
||||||
Retrieves indexing data required to repad unpadded (ragged) tensors.
|
Retrieves indexing data required to repad unpadded (ragged) tensors.
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2024 The ggml.ai team and The HuggingFace Inc. team. and pygguf author (github.com/99991)
|
# Copyright 2024 The ggml.ai team and The HuggingFace Inc. team. and pygguf author (github.com/99991)
|
||||||
# https://github.com/99991/pygguf
|
# https://github.com/99991/pygguf
|
||||||
#
|
#
|
||||||
@ -15,7 +14,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Dict, NamedTuple, Optional
|
from typing import NamedTuple, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
@ -115,7 +114,7 @@ class Qwen2MoeTensorProcessor(TensorProcessor):
|
|||||||
return GGUFTensor(weights, name, {})
|
return GGUFTensor(weights, name, {})
|
||||||
|
|
||||||
def _split_moe_expert_tensor(
|
def _split_moe_expert_tensor(
|
||||||
self, weights: np.ndarray, parsed_parameters: Dict[str, Dict], name: str, tensor_key_mapping: dict
|
self, weights: np.ndarray, parsed_parameters: dict[str, dict], name: str, tensor_key_mapping: dict
|
||||||
):
|
):
|
||||||
# Original merge implementation
|
# Original merge implementation
|
||||||
# https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py#L1994-L2022
|
# https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py#L1994-L2022
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from typing import Optional, Tuple
|
from typing import Optional
|
||||||
|
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
from .utils import is_torch_available, logging
|
from .utils import is_torch_available, logging
|
||||||
@ -31,7 +31,7 @@ def _compute_default_rope_parameters(
|
|||||||
device: Optional["torch.device"] = None,
|
device: Optional["torch.device"] = None,
|
||||||
seq_len: Optional[int] = None,
|
seq_len: Optional[int] = None,
|
||||||
**rope_kwargs,
|
**rope_kwargs,
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies according to the original RoPE implementation
|
Computes the inverse frequencies according to the original RoPE implementation
|
||||||
Args:
|
Args:
|
||||||
@ -73,7 +73,7 @@ def _compute_linear_scaling_rope_parameters(
|
|||||||
device: Optional["torch.device"] = None,
|
device: Optional["torch.device"] = None,
|
||||||
seq_len: Optional[int] = None,
|
seq_len: Optional[int] = None,
|
||||||
**rope_kwargs,
|
**rope_kwargs,
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
|
Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
|
||||||
Args:
|
Args:
|
||||||
@ -114,7 +114,7 @@ def _compute_dynamic_ntk_parameters(
|
|||||||
device: Optional["torch.device"] = None,
|
device: Optional["torch.device"] = None,
|
||||||
seq_len: Optional[int] = None,
|
seq_len: Optional[int] = None,
|
||||||
**rope_kwargs,
|
**rope_kwargs,
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
|
Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
|
||||||
Args:
|
Args:
|
||||||
@ -162,7 +162,7 @@ def _compute_dynamic_ntk_parameters(
|
|||||||
|
|
||||||
def _compute_yarn_parameters(
|
def _compute_yarn_parameters(
|
||||||
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies with NTK scaling. Please refer to the
|
Computes the inverse frequencies with NTK scaling. Please refer to the
|
||||||
[original paper](https://arxiv.org/abs/2309.00071)
|
[original paper](https://arxiv.org/abs/2309.00071)
|
||||||
@ -241,7 +241,7 @@ def _compute_yarn_parameters(
|
|||||||
|
|
||||||
def _compute_longrope_parameters(
|
def _compute_longrope_parameters(
|
||||||
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies with LongRoPE scaling. Please refer to the
|
Computes the inverse frequencies with LongRoPE scaling. Please refer to the
|
||||||
[original implementation](https://github.com/microsoft/LongRoPE)
|
[original implementation](https://github.com/microsoft/LongRoPE)
|
||||||
@ -304,7 +304,7 @@ def _compute_longrope_parameters(
|
|||||||
|
|
||||||
def _compute_llama3_parameters(
|
def _compute_llama3_parameters(
|
||||||
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
config: PretrainedConfig, device: "torch.device", seq_len: Optional[int] = None, **rope_kwargs
|
||||||
) -> Tuple["torch.Tensor", float]:
|
) -> tuple["torch.Tensor", float]:
|
||||||
"""
|
"""
|
||||||
Computes the inverse frequencies for llama 3.1.
|
Computes the inverse frequencies for llama 3.1.
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
#
|
#
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
"""Functions and classes related to optimization (weight updates)."""
|
"""Functions and classes related to optimization (weight updates)."""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Callable, List, Optional, Union
|
from typing import Callable, Optional, Union
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
@ -105,7 +105,7 @@ def create_optimizer(
|
|||||||
adam_global_clipnorm: Optional[float] = None,
|
adam_global_clipnorm: Optional[float] = None,
|
||||||
weight_decay_rate: float = 0.0,
|
weight_decay_rate: float = 0.0,
|
||||||
power: float = 1.0,
|
power: float = 1.0,
|
||||||
include_in_weight_decay: Optional[List[str]] = None,
|
include_in_weight_decay: Optional[list[str]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Creates an optimizer with a learning rate schedule using a warmup phase followed by a linear decay.
|
Creates an optimizer with a learning rate schedule using a warmup phase followed by a linear decay.
|
||||||
@ -224,8 +224,8 @@ class AdamWeightDecay(Adam):
|
|||||||
epsilon: float = 1e-7,
|
epsilon: float = 1e-7,
|
||||||
amsgrad: bool = False,
|
amsgrad: bool = False,
|
||||||
weight_decay_rate: float = 0.0,
|
weight_decay_rate: float = 0.0,
|
||||||
include_in_weight_decay: Optional[List[str]] = None,
|
include_in_weight_decay: Optional[list[str]] = None,
|
||||||
exclude_from_weight_decay: Optional[List[str]] = None,
|
exclude_from_weight_decay: Optional[list[str]] = None,
|
||||||
name: str = "AdamWeightDecay",
|
name: str = "AdamWeightDecay",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@ -238,10 +238,10 @@ class AdamWeightDecay(Adam):
|
|||||||
def from_config(cls, config):
|
def from_config(cls, config):
|
||||||
"""Creates an optimizer from its config with WarmUp custom object."""
|
"""Creates an optimizer from its config with WarmUp custom object."""
|
||||||
custom_objects = {"WarmUp": WarmUp}
|
custom_objects = {"WarmUp": WarmUp}
|
||||||
return super(AdamWeightDecay, cls).from_config(config, custom_objects=custom_objects)
|
return super().from_config(config, custom_objects=custom_objects)
|
||||||
|
|
||||||
def _prepare_local(self, var_device, var_dtype, apply_state):
|
def _prepare_local(self, var_device, var_dtype, apply_state):
|
||||||
super(AdamWeightDecay, self)._prepare_local(var_device, var_dtype, apply_state)
|
super()._prepare_local(var_device, var_dtype, apply_state)
|
||||||
apply_state[(var_device, var_dtype)]["weight_decay_rate"] = tf.constant(
|
apply_state[(var_device, var_dtype)]["weight_decay_rate"] = tf.constant(
|
||||||
self.weight_decay_rate, name="adam_weight_decay_rate"
|
self.weight_decay_rate, name="adam_weight_decay_rate"
|
||||||
)
|
)
|
||||||
@ -257,7 +257,7 @@ class AdamWeightDecay(Adam):
|
|||||||
|
|
||||||
def apply_gradients(self, grads_and_vars, name=None, **kwargs):
|
def apply_gradients(self, grads_and_vars, name=None, **kwargs):
|
||||||
grads, tvars = list(zip(*grads_and_vars))
|
grads, tvars = list(zip(*grads_and_vars))
|
||||||
return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars), name=name, **kwargs)
|
return super().apply_gradients(zip(grads, tvars), name=name, **kwargs)
|
||||||
|
|
||||||
def _get_lr(self, var_device, var_dtype, apply_state):
|
def _get_lr(self, var_device, var_dtype, apply_state):
|
||||||
"""Retrieves the learning rate with the given state."""
|
"""Retrieves the learning rate with the given state."""
|
||||||
@ -276,13 +276,13 @@ class AdamWeightDecay(Adam):
|
|||||||
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
|
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
|
||||||
decay = self._decay_weights_op(var, lr_t, apply_state)
|
decay = self._decay_weights_op(var, lr_t, apply_state)
|
||||||
with tf.control_dependencies([decay]):
|
with tf.control_dependencies([decay]):
|
||||||
return super(AdamWeightDecay, self)._resource_apply_dense(grad, var, **kwargs)
|
return super()._resource_apply_dense(grad, var, **kwargs)
|
||||||
|
|
||||||
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
|
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
|
||||||
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
|
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
|
||||||
decay = self._decay_weights_op(var, lr_t, apply_state)
|
decay = self._decay_weights_op(var, lr_t, apply_state)
|
||||||
with tf.control_dependencies([decay]):
|
with tf.control_dependencies([decay]):
|
||||||
return super(AdamWeightDecay, self)._resource_apply_sparse(grad, var, indices, **kwargs)
|
return super()._resource_apply_sparse(grad, var, indices, **kwargs)
|
||||||
|
|
||||||
def get_config(self):
|
def get_config(self):
|
||||||
config = super().get_config()
|
config = super().get_config()
|
||||||
|
@ -15,7 +15,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
from functools import lru_cache, wraps
|
from functools import lru_cache, wraps
|
||||||
from typing import Callable, List, Optional, Set, Tuple, Union
|
from typing import Callable
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from packaging import version
|
from packaging import version
|
||||||
@ -157,9 +157,7 @@ def prune_conv1d_layer(layer: Conv1D, index: torch.LongTensor, dim: int = 1) ->
|
|||||||
return new_layer
|
return new_layer
|
||||||
|
|
||||||
|
|
||||||
def prune_layer(
|
def prune_layer(layer: nn.Linear | Conv1D, index: torch.LongTensor, dim: int | None = None) -> nn.Linear | Conv1D:
|
||||||
layer: Union[nn.Linear, Conv1D], index: torch.LongTensor, dim: Optional[int] = None
|
|
||||||
) -> Union[nn.Linear, Conv1D]:
|
|
||||||
"""
|
"""
|
||||||
Prune a Conv1D or linear layer to keep only entries in index.
|
Prune a Conv1D or linear layer to keep only entries in index.
|
||||||
|
|
||||||
@ -260,8 +258,8 @@ def apply_chunking_to_forward(
|
|||||||
|
|
||||||
|
|
||||||
def find_pruneable_heads_and_indices(
|
def find_pruneable_heads_and_indices(
|
||||||
heads: List[int], n_heads: int, head_size: int, already_pruned_heads: Set[int]
|
heads: list[int], n_heads: int, head_size: int, already_pruned_heads: set[int]
|
||||||
) -> Tuple[Set[int], torch.LongTensor]:
|
) -> tuple[set[int], torch.LongTensor]:
|
||||||
"""
|
"""
|
||||||
Finds the heads and their indices taking `already_pruned_heads` into account.
|
Finds the heads and their indices taking `already_pruned_heads` into account.
|
||||||
|
|
||||||
@ -286,9 +284,7 @@ def find_pruneable_heads_and_indices(
|
|||||||
return heads, index
|
return heads, index
|
||||||
|
|
||||||
|
|
||||||
def meshgrid(
|
def meshgrid(*tensors: torch.Tensor | list[torch.Tensor], indexing: str | None = None) -> tuple[torch.Tensor, ...]:
|
||||||
*tensors: Union[torch.Tensor, List[torch.Tensor]], indexing: Optional[str] = None
|
|
||||||
) -> Tuple[torch.Tensor, ...]:
|
|
||||||
"""
|
"""
|
||||||
Wrapper around torch.meshgrid to avoid warning messages about the introduced `indexing` argument.
|
Wrapper around torch.meshgrid to avoid warning messages about the introduced `indexing` argument.
|
||||||
|
|
||||||
@ -297,7 +293,7 @@ def meshgrid(
|
|||||||
return torch.meshgrid(*tensors, indexing=indexing)
|
return torch.meshgrid(*tensors, indexing=indexing)
|
||||||
|
|
||||||
|
|
||||||
def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:
|
def id_tensor_storage(tensor: torch.Tensor) -> tuple[torch.device, int, int]:
|
||||||
"""
|
"""
|
||||||
Unique identifier to a tensor storage. Multiple different tensors can share the same underlying storage. For
|
Unique identifier to a tensor storage. Multiple different tensors can share the same underlying storage. For
|
||||||
example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
|
example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
|
||||||
|
@ -33,12 +33,12 @@ import threading
|
|||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
from collections import UserDict, defaultdict
|
from collections import UserDict, defaultdict
|
||||||
from collections.abc import Mapping
|
from collections.abc import Generator, Iterable, Iterator, Mapping
|
||||||
from dataclasses import MISSING, fields
|
from dataclasses import MISSING, fields
|
||||||
from functools import cache, wraps
|
from functools import cache, wraps
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Dict, Generator, Iterable, Iterator, List, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@ -1456,14 +1456,13 @@ def get_steps_per_epoch(trainer: Trainer) -> int:
|
|||||||
|
|
||||||
|
|
||||||
def evaluate_side_effect_factory(
|
def evaluate_side_effect_factory(
|
||||||
side_effect_values: List[Dict[str, float]],
|
side_effect_values: list[dict[str, float]],
|
||||||
) -> Generator[Dict[str, float], None, None]:
|
) -> Generator[dict[str, float], None, None]:
|
||||||
"""
|
"""
|
||||||
Function that returns side effects for the _evaluate method.
|
Function that returns side effects for the _evaluate method.
|
||||||
Used when we're unsure of exactly how many times _evaluate will be called.
|
Used when we're unsure of exactly how many times _evaluate will be called.
|
||||||
"""
|
"""
|
||||||
for side_effect_value in side_effect_values:
|
yield from side_effect_values
|
||||||
yield side_effect_value
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
yield side_effect_values[-1]
|
yield side_effect_values[-1]
|
||||||
@ -2444,7 +2443,7 @@ def nested_simplify(obj, decimals=3):
|
|||||||
|
|
||||||
|
|
||||||
def check_json_file_has_correct_format(file_path):
|
def check_json_file_has_correct_format(file_path):
|
||||||
with open(file_path, "r") as f:
|
with open(file_path) as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
if len(lines) == 1:
|
if len(lines) == 1:
|
||||||
# length can only be 1 if dict is empty
|
# length can only be 1 if dict is empty
|
||||||
@ -2471,7 +2470,7 @@ class SubprocessCallException(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def run_command(command: List[str], return_stdout=False):
|
def run_command(command: list[str], return_stdout=False):
|
||||||
"""
|
"""
|
||||||
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
|
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
|
||||||
if an error occurred while running `command`
|
if an error occurred while running `command`
|
||||||
@ -2904,7 +2903,7 @@ class HfDoctestModule(Module):
|
|||||||
yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
|
yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
|
||||||
|
|
||||||
|
|
||||||
def _device_agnostic_dispatch(device: str, dispatch_table: Dict[str, Callable], *args, **kwargs):
|
def _device_agnostic_dispatch(device: str, dispatch_table: dict[str, Callable], *args, **kwargs):
|
||||||
if device not in dispatch_table:
|
if device not in dispatch_table:
|
||||||
return dispatch_table["default"](*args, **kwargs)
|
return dispatch_table["default"](*args, **kwargs)
|
||||||
|
|
||||||
@ -2992,7 +2991,7 @@ if is_torch_available():
|
|||||||
|
|
||||||
torch_device = device_name
|
torch_device = device_name
|
||||||
|
|
||||||
def update_mapping_from_spec(device_fn_dict: Dict[str, Callable], attribute_name: str):
|
def update_mapping_from_spec(device_fn_dict: dict[str, Callable], attribute_name: str):
|
||||||
try:
|
try:
|
||||||
# Try to import the function directly
|
# Try to import the function directly
|
||||||
spec_fn = getattr(device_spec_module, attribute_name)
|
spec_fn = getattr(device_spec_module, attribute_name)
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from typing import List, Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
@ -25,7 +25,7 @@ from .utils import logging
|
|||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def shape_list(tensor: Union[tf.Tensor, np.ndarray]) -> List[int]:
|
def shape_list(tensor: Union[tf.Tensor, np.ndarray]) -> list[int]:
|
||||||
"""
|
"""
|
||||||
Deal with dynamic shape in tensorflow cleanly.
|
Deal with dynamic shape in tensorflow cleanly.
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2023 The HuggingFace Inc. team.
|
# Copyright 2023 The HuggingFace Inc. team.
|
||||||
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||||
#
|
#
|
||||||
@ -17,7 +16,7 @@
|
|||||||
Time series distributional output classes and utilities.
|
Time series distributional output classes and utilities.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Callable, Dict, Optional, Tuple
|
from typing import Callable, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -63,14 +62,14 @@ class AffineTransformed(TransformedDistribution):
|
|||||||
|
|
||||||
class ParameterProjection(nn.Module):
|
class ParameterProjection(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, in_features: int, args_dim: Dict[str, int], domain_map: Callable[..., Tuple[torch.Tensor]], **kwargs
|
self, in_features: int, args_dim: dict[str, int], domain_map: Callable[..., tuple[torch.Tensor]], **kwargs
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.args_dim = args_dim
|
self.args_dim = args_dim
|
||||||
self.proj = nn.ModuleList([nn.Linear(in_features, dim) for dim in args_dim.values()])
|
self.proj = nn.ModuleList([nn.Linear(in_features, dim) for dim in args_dim.values()])
|
||||||
self.domain_map = domain_map
|
self.domain_map = domain_map
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor]:
|
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor]:
|
||||||
params_unbounded = [proj(x) for proj in self.proj]
|
params_unbounded = [proj(x) for proj in self.proj]
|
||||||
|
|
||||||
return self.domain_map(*params_unbounded)
|
return self.domain_map(*params_unbounded)
|
||||||
@ -88,7 +87,7 @@ class LambdaLayer(nn.Module):
|
|||||||
class DistributionOutput:
|
class DistributionOutput:
|
||||||
distribution_class: type
|
distribution_class: type
|
||||||
in_features: int
|
in_features: int
|
||||||
args_dim: Dict[str, int]
|
args_dim: dict[str, int]
|
||||||
|
|
||||||
def __init__(self, dim: int = 1) -> None:
|
def __init__(self, dim: int = 1) -> None:
|
||||||
self.dim = dim
|
self.dim = dim
|
||||||
@ -113,7 +112,7 @@ class DistributionOutput:
|
|||||||
return AffineTransformed(distr, loc=loc, scale=scale, event_dim=self.event_dim)
|
return AffineTransformed(distr, loc=loc, scale=scale, event_dim=self.event_dim)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def event_shape(self) -> Tuple:
|
def event_shape(self) -> tuple:
|
||||||
r"""
|
r"""
|
||||||
Shape of each individual event contemplated by the distributions that this object constructs.
|
Shape of each individual event contemplated by the distributions that this object constructs.
|
||||||
"""
|
"""
|
||||||
@ -167,7 +166,7 @@ class StudentTOutput(DistributionOutput):
|
|||||||
Student-T distribution output class.
|
Student-T distribution output class.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
args_dim: Dict[str, int] = {"df": 1, "loc": 1, "scale": 1}
|
args_dim: dict[str, int] = {"df": 1, "loc": 1, "scale": 1}
|
||||||
distribution_class: type = StudentT
|
distribution_class: type = StudentT
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -182,7 +181,7 @@ class NormalOutput(DistributionOutput):
|
|||||||
Normal distribution output class.
|
Normal distribution output class.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
args_dim: Dict[str, int] = {"loc": 1, "scale": 1}
|
args_dim: dict[str, int] = {"loc": 1, "scale": 1}
|
||||||
distribution_class: type = Normal
|
distribution_class: type = Normal
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -196,7 +195,7 @@ class NegativeBinomialOutput(DistributionOutput):
|
|||||||
Negative Binomial distribution output class.
|
Negative Binomial distribution output class.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
args_dim: Dict[str, int] = {"total_count": 1, "logits": 1}
|
args_dim: dict[str, int] = {"total_count": 1, "logits": 1}
|
||||||
distribution_class: type = NegativeBinomial
|
distribution_class: type = NegativeBinomial
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020 The HuggingFace Inc. team.
|
# Copyright 2020 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -22,7 +21,7 @@ import itertools
|
|||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union, overload
|
from typing import Any, Optional, Union, overload
|
||||||
|
|
||||||
from .tokenization_utils_base import (
|
from .tokenization_utils_base import (
|
||||||
ENCODE_KWARGS_DOCSTRING,
|
ENCODE_KWARGS_DOCSTRING,
|
||||||
@ -103,7 +102,7 @@ class Trie:
|
|||||||
ref = ref[char]
|
ref = ref[char]
|
||||||
ref[self._termination_char] = 1
|
ref[self._termination_char] = 1
|
||||||
|
|
||||||
def split(self, text: str) -> List[str]:
|
def split(self, text: str) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Will look for the words added to the trie within `text`. Output is the original string splitted along the
|
Will look for the words added to the trie within `text`. Output is the original string splitted along the
|
||||||
boundaries of the words found.
|
boundaries of the words found.
|
||||||
@ -391,7 +390,7 @@ def _is_start_of_word(text):
|
|||||||
return bool(_is_control(first_char) | _is_punctuation(first_char) | _is_whitespace(first_char))
|
return bool(_is_control(first_char) | _is_punctuation(first_char) | _is_whitespace(first_char))
|
||||||
|
|
||||||
|
|
||||||
def _insert_one_token_to_ordered_list(token_list: List[str], new_token: str):
|
def _insert_one_token_to_ordered_list(token_list: list[str], new_token: str):
|
||||||
"""
|
"""
|
||||||
Inserts one token to an ordered list if it does not already exist. Note: token_list must be sorted.
|
Inserts one token to an ordered list if it does not already exist. Note: token_list must be sorted.
|
||||||
"""
|
"""
|
||||||
@ -425,11 +424,11 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
# 2. init `_added_tokens_decoder` if child class did not
|
# 2. init `_added_tokens_decoder` if child class did not
|
||||||
if not hasattr(self, "_added_tokens_decoder"):
|
if not hasattr(self, "_added_tokens_decoder"):
|
||||||
self._added_tokens_decoder: Dict[int, AddedToken] = {}
|
self._added_tokens_decoder: dict[int, AddedToken] = {}
|
||||||
|
|
||||||
# 3. if a `added_tokens_decoder` is passed, we are loading from a saved tokenizer, we overwrite
|
# 3. if a `added_tokens_decoder` is passed, we are loading from a saved tokenizer, we overwrite
|
||||||
self._added_tokens_decoder.update(kwargs.pop("added_tokens_decoder", {}))
|
self._added_tokens_decoder.update(kwargs.pop("added_tokens_decoder", {}))
|
||||||
self._added_tokens_encoder: Dict[str, int] = {k.content: v for v, k in self._added_tokens_decoder.items()}
|
self._added_tokens_encoder: dict[str, int] = {k.content: v for v, k in self._added_tokens_decoder.items()}
|
||||||
|
|
||||||
# 4 init the parent class
|
# 4 init the parent class
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
@ -455,7 +454,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def added_tokens_encoder(self) -> Dict[str, int]:
|
def added_tokens_encoder(self) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Returns the sorted mapping from string to index. The added tokens encoder is cached for performance
|
Returns the sorted mapping from string to index. The added tokens encoder is cached for performance
|
||||||
optimisation in `self._added_tokens_encoder` for the slow tokenizers.
|
optimisation in `self._added_tokens_encoder` for the slow tokenizers.
|
||||||
@ -463,7 +462,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
return {k.content: v for v, k in sorted(self._added_tokens_decoder.items(), key=lambda item: item[0])}
|
return {k.content: v for v, k in sorted(self._added_tokens_decoder.items(), key=lambda item: item[0])}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def added_tokens_decoder(self) -> Dict[int, AddedToken]:
|
def added_tokens_decoder(self) -> dict[int, AddedToken]:
|
||||||
"""
|
"""
|
||||||
Returns the added tokens in the vocabulary as a dictionary of index to AddedToken.
|
Returns the added tokens in the vocabulary as a dictionary of index to AddedToken.
|
||||||
|
|
||||||
@ -473,7 +472,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
return dict(sorted(self._added_tokens_decoder.items(), key=lambda item: item[0]))
|
return dict(sorted(self._added_tokens_decoder.items(), key=lambda item: item[0]))
|
||||||
|
|
||||||
@added_tokens_decoder.setter
|
@added_tokens_decoder.setter
|
||||||
def added_tokens_decoder(self, value: Dict[int, Union[AddedToken, str]]) -> Dict[int, AddedToken]:
|
def added_tokens_decoder(self, value: dict[int, Union[AddedToken, str]]) -> dict[int, AddedToken]:
|
||||||
# Always raise an error if string because users should define the behavior
|
# Always raise an error if string because users should define the behavior
|
||||||
for index, token in value.items():
|
for index, token in value.items():
|
||||||
if not isinstance(token, (str, AddedToken)) or not isinstance(index, int):
|
if not isinstance(token, (str, AddedToken)) or not isinstance(index, int):
|
||||||
@ -485,7 +484,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
self._added_tokens_encoder[str(token)] = index
|
self._added_tokens_encoder[str(token)] = index
|
||||||
self._update_total_vocab_size()
|
self._update_total_vocab_size()
|
||||||
|
|
||||||
def get_added_vocab(self) -> Dict[str, int]:
|
def get_added_vocab(self) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Returns the added tokens in the vocabulary as a dictionary of token to index. Results might be different from
|
Returns the added tokens in the vocabulary as a dictionary of token to index. Results might be different from
|
||||||
the fast call because for now we always add the tokens even if they are already in the vocabulary. This is
|
the fast call because for now we always add the tokens even if they are already in the vocabulary. This is
|
||||||
@ -510,7 +509,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
"""
|
"""
|
||||||
self.total_vocab_size = len(self.get_vocab())
|
self.total_vocab_size = len(self.get_vocab())
|
||||||
|
|
||||||
def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_tokens: bool = False) -> int:
|
def _add_tokens(self, new_tokens: Union[list[str], list[AddedToken]], special_tokens: bool = False) -> int:
|
||||||
"""
|
"""
|
||||||
Add a list of new tokens to the tokenizer class. If the new tokens are not in the vocabulary, they are added to
|
Add a list of new tokens to the tokenizer class. If the new tokens are not in the vocabulary, they are added to
|
||||||
it with indices starting from length of the current vocabulary. Special tokens are sometimes already in the
|
it with indices starting from length of the current vocabulary. Special tokens are sometimes already in the
|
||||||
@ -619,7 +618,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
token_ids_1 = []
|
token_ids_1 = []
|
||||||
return len(self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 if pair else None))
|
return len(self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 if pair else None))
|
||||||
|
|
||||||
def tokenize(self, text: TextInput, **kwargs) -> List[str]:
|
def tokenize(self, text: TextInput, **kwargs) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Converts a string into a sequence of tokens, using the tokenizer.
|
Converts a string into a sequence of tokens, using the tokenizer.
|
||||||
|
|
||||||
@ -708,7 +707,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def convert_tokens_to_ids(self, tokens: Union[str, List[str]]) -> Union[int, List[int]]:
|
def convert_tokens_to_ids(self, tokens: Union[str, list[str]]) -> Union[int, list[int]]:
|
||||||
"""
|
"""
|
||||||
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
|
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
|
||||||
vocabulary.
|
vocabulary.
|
||||||
@ -824,12 +823,12 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
def _batch_encode_plus(
|
def _batch_encode_plus(
|
||||||
self,
|
self,
|
||||||
batch_text_or_text_pairs: Union[
|
batch_text_or_text_pairs: Union[
|
||||||
List[TextInput],
|
list[TextInput],
|
||||||
List[TextInputPair],
|
list[TextInputPair],
|
||||||
List[PreTokenizedInput],
|
list[PreTokenizedInput],
|
||||||
List[PreTokenizedInputPair],
|
list[PreTokenizedInputPair],
|
||||||
List[EncodedInput],
|
list[EncodedInput],
|
||||||
List[EncodedInputPair],
|
list[EncodedInputPair],
|
||||||
],
|
],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||||
@ -913,7 +912,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
||||||
def _batch_prepare_for_model(
|
def _batch_prepare_for_model(
|
||||||
self,
|
self,
|
||||||
batch_ids_pairs: List[Union[PreTokenizedInputPair, Tuple[List[int], None]]],
|
batch_ids_pairs: list[Union[PreTokenizedInputPair, tuple[list[int], None]]],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||||
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
||||||
@ -982,7 +981,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
def prepare_for_tokenization(
|
def prepare_for_tokenization(
|
||||||
self, text: str, is_split_into_words: bool = False, **kwargs
|
self, text: str, is_split_into_words: bool = False, **kwargs
|
||||||
) -> Tuple[str, Dict[str, Any]]:
|
) -> tuple[str, dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Performs any necessary transformations before tokenization.
|
Performs any necessary transformations before tokenization.
|
||||||
|
|
||||||
@ -1005,8 +1004,8 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
return (text, kwargs)
|
return (text, kwargs)
|
||||||
|
|
||||||
def get_special_tokens_mask(
|
def get_special_tokens_mask(
|
||||||
self, token_ids_0: List, token_ids_1: Optional[List] = None, already_has_special_tokens: bool = False
|
self, token_ids_0: list, token_ids_1: Optional[list] = None, already_has_special_tokens: bool = False
|
||||||
) -> List[int]:
|
) -> list[int]:
|
||||||
"""
|
"""
|
||||||
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||||
special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
|
special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
|
||||||
@ -1038,11 +1037,11 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: ...
|
def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def convert_ids_to_tokens(self, ids: List[int], skip_special_tokens: bool = False) -> List[str]: ...
|
def convert_ids_to_tokens(self, ids: list[int], skip_special_tokens: bool = False) -> list[str]: ...
|
||||||
|
|
||||||
def convert_ids_to_tokens(
|
def convert_ids_to_tokens(
|
||||||
self, ids: Union[int, List[int]], skip_special_tokens: bool = False
|
self, ids: Union[int, list[int]], skip_special_tokens: bool = False
|
||||||
) -> Union[str, List[str]]:
|
) -> Union[str, list[str]]:
|
||||||
"""
|
"""
|
||||||
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
||||||
added tokens.
|
added tokens.
|
||||||
@ -1075,12 +1074,12 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
def _convert_id_to_token(self, index: int) -> str:
|
def _convert_id_to_token(self, index: int) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def convert_tokens_to_string(self, tokens: List[str]) -> str:
|
def convert_tokens_to_string(self, tokens: list[str]) -> str:
|
||||||
return " ".join(tokens)
|
return " ".join(tokens)
|
||||||
|
|
||||||
def _decode(
|
def _decode(
|
||||||
self,
|
self,
|
||||||
token_ids: Union[int, List[int]],
|
token_ids: Union[int, list[int]],
|
||||||
skip_special_tokens: bool = False,
|
skip_special_tokens: bool = False,
|
||||||
clean_up_tokenization_spaces: bool = None,
|
clean_up_tokenization_spaces: bool = None,
|
||||||
spaces_between_special_tokens: bool = True,
|
spaces_between_special_tokens: bool = True,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020 The HuggingFace Inc. team.
|
# Copyright 2020 The HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -21,7 +20,8 @@ import copy
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
from collections.abc import Iterable
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import tokenizers.pre_tokenizers as pre_tokenizers_fast
|
import tokenizers.pre_tokenizers as pre_tokenizers_fast
|
||||||
from tokenizers import Encoding as EncodingFast
|
from tokenizers import Encoding as EncodingFast
|
||||||
@ -238,15 +238,15 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
"""
|
"""
|
||||||
return self._tokenizer.get_vocab_size(with_added_tokens=False)
|
return self._tokenizer.get_vocab_size(with_added_tokens=False)
|
||||||
|
|
||||||
def get_vocab(self) -> Dict[str, int]:
|
def get_vocab(self) -> dict[str, int]:
|
||||||
return self._tokenizer.get_vocab(with_added_tokens=True)
|
return self._tokenizer.get_vocab(with_added_tokens=True)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def vocab(self) -> Dict[str, int]:
|
def vocab(self) -> dict[str, int]:
|
||||||
return self.get_vocab()
|
return self.get_vocab()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def added_tokens_encoder(self) -> Dict[str, int]:
|
def added_tokens_encoder(self) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Returns the sorted mapping from string to index. The added tokens encoder is cached for performance
|
Returns the sorted mapping from string to index. The added tokens encoder is cached for performance
|
||||||
optimisation in `self._added_tokens_encoder` for the slow tokenizers.
|
optimisation in `self._added_tokens_encoder` for the slow tokenizers.
|
||||||
@ -254,7 +254,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
return {k.content: v for v, k in sorted(self.added_tokens_decoder.items(), key=lambda item: item[0])}
|
return {k.content: v for v, k in sorted(self.added_tokens_decoder.items(), key=lambda item: item[0])}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def added_tokens_decoder(self) -> Dict[int, AddedToken]:
|
def added_tokens_decoder(self) -> dict[int, AddedToken]:
|
||||||
"""
|
"""
|
||||||
Returns the added tokens in the vocabulary as a dictionary of index to AddedToken.
|
Returns the added tokens in the vocabulary as a dictionary of index to AddedToken.
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
"""
|
"""
|
||||||
return self._tokenizer.get_added_tokens_decoder()
|
return self._tokenizer.get_added_tokens_decoder()
|
||||||
|
|
||||||
def get_added_vocab(self) -> Dict[str, int]:
|
def get_added_vocab(self) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Returns the added tokens in the vocabulary as a dictionary of token to index.
|
Returns the added tokens in the vocabulary as a dictionary of token to index.
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
return_offsets_mapping: bool = False,
|
return_offsets_mapping: bool = False,
|
||||||
return_length: bool = False,
|
return_length: bool = False,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
) -> Tuple[Dict[str, Any], List[EncodingFast]]:
|
) -> tuple[dict[str, Any], list[EncodingFast]]:
|
||||||
"""
|
"""
|
||||||
Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict and a list
|
Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict and a list
|
||||||
of encodings, take care of building a batch from overflowing tokens.
|
of encodings, take care of building a batch from overflowing tokens.
|
||||||
@ -339,7 +339,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
return encoding_dict, encodings
|
return encoding_dict, encodings
|
||||||
|
|
||||||
def convert_tokens_to_ids(self, tokens: Union[str, Iterable[str]]) -> Union[int, List[int]]:
|
def convert_tokens_to_ids(self, tokens: Union[str, Iterable[str]]) -> Union[int, list[int]]:
|
||||||
"""
|
"""
|
||||||
Converts a token string (or a sequence of tokens) in a single integer id (or a Iterable of ids), using the
|
Converts a token string (or a sequence of tokens) in a single integer id (or a Iterable of ids), using the
|
||||||
vocabulary.
|
vocabulary.
|
||||||
@ -364,7 +364,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
def _convert_id_to_token(self, index: int) -> Optional[str]:
|
def _convert_id_to_token(self, index: int) -> Optional[str]:
|
||||||
return self._tokenizer.id_to_token(int(index))
|
return self._tokenizer.id_to_token(int(index))
|
||||||
|
|
||||||
def _add_tokens(self, new_tokens: List[Union[str, AddedToken]], special_tokens=False) -> int:
|
def _add_tokens(self, new_tokens: list[Union[str, AddedToken]], special_tokens=False) -> int:
|
||||||
if special_tokens:
|
if special_tokens:
|
||||||
return self._tokenizer.add_special_tokens(new_tokens)
|
return self._tokenizer.add_special_tokens(new_tokens)
|
||||||
|
|
||||||
@ -392,8 +392,8 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
return self._tokenizer.num_special_tokens_to_add(pair)
|
return self._tokenizer.num_special_tokens_to_add(pair)
|
||||||
|
|
||||||
def convert_ids_to_tokens(
|
def convert_ids_to_tokens(
|
||||||
self, ids: Union[int, List[int]], skip_special_tokens: bool = False
|
self, ids: Union[int, list[int]], skip_special_tokens: bool = False
|
||||||
) -> Union[str, List[str]]:
|
) -> Union[str, list[str]]:
|
||||||
"""
|
"""
|
||||||
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
||||||
added tokens.
|
added tokens.
|
||||||
@ -417,7 +417,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
tokens.append(self._tokenizer.id_to_token(index))
|
tokens.append(self._tokenizer.id_to_token(index))
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> List[str]:
|
def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
|
||||||
return self.encode_plus(text=text, text_pair=pair, add_special_tokens=add_special_tokens, **kwargs).tokens()
|
return self.encode_plus(text=text, text_pair=pair, add_special_tokens=add_special_tokens, **kwargs).tokens()
|
||||||
|
|
||||||
def set_truncation_and_padding(
|
def set_truncation_and_padding(
|
||||||
@ -498,7 +498,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
def _batch_encode_plus(
|
def _batch_encode_plus(
|
||||||
self,
|
self,
|
||||||
batch_text_or_text_pairs: Union[
|
batch_text_or_text_pairs: Union[
|
||||||
List[TextInput], List[TextInputPair], List[PreTokenizedInput], List[PreTokenizedInputPair]
|
list[TextInput], list[TextInputPair], list[PreTokenizedInput], list[PreTokenizedInputPair]
|
||||||
],
|
],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||||
@ -647,7 +647,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
return batched_output
|
return batched_output
|
||||||
|
|
||||||
def convert_tokens_to_string(self, tokens: List[str]) -> str:
|
def convert_tokens_to_string(self, tokens: list[str]) -> str:
|
||||||
return (
|
return (
|
||||||
self.backend_tokenizer.decoder.decode(tokens)
|
self.backend_tokenizer.decoder.decode(tokens)
|
||||||
if self.backend_tokenizer.decoder is not None
|
if self.backend_tokenizer.decoder is not None
|
||||||
@ -656,7 +656,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
def _decode(
|
def _decode(
|
||||||
self,
|
self,
|
||||||
token_ids: Union[int, List[int]],
|
token_ids: Union[int, list[int]],
|
||||||
skip_special_tokens: bool = False,
|
skip_special_tokens: bool = False,
|
||||||
clean_up_tokenization_spaces: bool = None,
|
clean_up_tokenization_spaces: bool = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
@ -681,10 +681,10 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
def _save_pretrained(
|
def _save_pretrained(
|
||||||
self,
|
self,
|
||||||
save_directory: Union[str, os.PathLike],
|
save_directory: Union[str, os.PathLike],
|
||||||
file_names: Tuple[str],
|
file_names: tuple[str],
|
||||||
legacy_format: Optional[bool] = None,
|
legacy_format: Optional[bool] = None,
|
||||||
filename_prefix: Optional[str] = None,
|
filename_prefix: Optional[str] = None,
|
||||||
) -> Tuple[str]:
|
) -> tuple[str]:
|
||||||
"""
|
"""
|
||||||
Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens as well as in a unique JSON
|
Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens as well as in a unique JSON
|
||||||
file containing {config + vocab + added-tokens}.
|
file containing {config + vocab + added-tokens}.
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020-present the HuggingFace Inc. team.
|
# Copyright 2020-present the HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -34,7 +33,7 @@ import time
|
|||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
||||||
|
|
||||||
|
|
||||||
# Integrations must be imported before ML frameworks:
|
# Integrations must be imported before ML frameworks:
|
||||||
@ -419,16 +418,16 @@ class Trainer:
|
|||||||
args: TrainingArguments = None,
|
args: TrainingArguments = None,
|
||||||
data_collator: Optional[DataCollator] = None,
|
data_collator: Optional[DataCollator] = None,
|
||||||
train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
|
train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
|
||||||
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset], "datasets.Dataset"]] = None,
|
eval_dataset: Optional[Union[Dataset, dict[str, Dataset], "datasets.Dataset"]] = None,
|
||||||
processing_class: Optional[
|
processing_class: Optional[
|
||||||
Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin]
|
Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin]
|
||||||
] = None,
|
] = None,
|
||||||
model_init: Optional[Callable[[], PreTrainedModel]] = None,
|
model_init: Optional[Callable[[], PreTrainedModel]] = None,
|
||||||
compute_loss_func: Optional[Callable] = None,
|
compute_loss_func: Optional[Callable] = None,
|
||||||
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
|
compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None,
|
||||||
callbacks: Optional[List[TrainerCallback]] = None,
|
callbacks: Optional[list[TrainerCallback]] = None,
|
||||||
optimizers: Tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
|
optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
|
||||||
optimizer_cls_and_kwargs: Optional[Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]] = None,
|
optimizer_cls_and_kwargs: Optional[tuple[type[torch.optim.Optimizer], dict[str, Any]]] = None,
|
||||||
preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
|
preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
|
||||||
):
|
):
|
||||||
if args is None:
|
if args is None:
|
||||||
@ -1187,7 +1186,7 @@ class Trainer:
|
|||||||
optimizer = self.optimizer
|
optimizer = self.optimizer
|
||||||
self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)
|
self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)
|
||||||
|
|
||||||
def get_decay_parameter_names(self, model) -> List[str]:
|
def get_decay_parameter_names(self, model) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Get all parameter names that weight decay will be applied to.
|
Get all parameter names that weight decay will be applied to.
|
||||||
|
|
||||||
@ -1298,7 +1297,7 @@ class Trainer:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_optimizer_cls_and_kwargs(
|
def get_optimizer_cls_and_kwargs(
|
||||||
args: TrainingArguments, model: Optional[PreTrainedModel] = None
|
args: TrainingArguments, model: Optional[PreTrainedModel] = None
|
||||||
) -> Tuple[Any, Any]:
|
) -> tuple[Any, Any]:
|
||||||
"""
|
"""
|
||||||
Returns the optimizer class and optimizer parameters based on the training arguments.
|
Returns the optimizer class and optimizer parameters based on the training arguments.
|
||||||
|
|
||||||
@ -1324,10 +1323,10 @@ class Trainer:
|
|||||||
|
|
||||||
def setup_low_rank_optimizer(
|
def setup_low_rank_optimizer(
|
||||||
optimizer_name: str,
|
optimizer_name: str,
|
||||||
optimizer_mapping: Dict[str, Any],
|
optimizer_mapping: dict[str, Any],
|
||||||
optim_kwargs: Dict[str, Any],
|
optim_kwargs: dict[str, Any],
|
||||||
is_layerwise_supported: bool = True,
|
is_layerwise_supported: bool = True,
|
||||||
) -> Tuple[Any, Any]:
|
) -> tuple[Any, Any]:
|
||||||
"""
|
"""
|
||||||
Helper function to set up low-rank optimizers like GaLore and Apollo.
|
Helper function to set up low-rank optimizers like GaLore and Apollo.
|
||||||
|
|
||||||
@ -1783,7 +1782,7 @@ class Trainer:
|
|||||||
logger.warning("Cannot get num_tokens from dataloader")
|
logger.warning("Cannot get num_tokens from dataloader")
|
||||||
return train_tokens
|
return train_tokens
|
||||||
|
|
||||||
def _hp_search_setup(self, trial: Union["optuna.Trial", Dict[str, Any]]):
|
def _hp_search_setup(self, trial: Union["optuna.Trial", dict[str, Any]]):
|
||||||
"""HP search setup code"""
|
"""HP search setup code"""
|
||||||
self._trial = trial
|
self._trial = trial
|
||||||
|
|
||||||
@ -1839,7 +1838,7 @@ class Trainer:
|
|||||||
|
|
||||||
self.create_accelerator_and_postprocess()
|
self.create_accelerator_and_postprocess()
|
||||||
|
|
||||||
def _report_to_hp_search(self, trial: Union["optuna.Trial", Dict[str, Any]], step: int, metrics: Dict[str, float]):
|
def _report_to_hp_search(self, trial: Union["optuna.Trial", dict[str, Any]], step: int, metrics: dict[str, float]):
|
||||||
if self.hp_search_backend is None or trial is None:
|
if self.hp_search_backend is None or trial is None:
|
||||||
return
|
return
|
||||||
metrics = metrics.copy()
|
metrics = metrics.copy()
|
||||||
@ -2140,8 +2139,8 @@ class Trainer:
|
|||||||
def train(
|
def train(
|
||||||
self,
|
self,
|
||||||
resume_from_checkpoint: Optional[Union[str, bool]] = None,
|
resume_from_checkpoint: Optional[Union[str, bool]] = None,
|
||||||
trial: Union["optuna.Trial", Dict[str, Any]] = None,
|
trial: Union["optuna.Trial", dict[str, Any]] = None,
|
||||||
ignore_keys_for_eval: Optional[List[str]] = None,
|
ignore_keys_for_eval: Optional[list[str]] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -3070,7 +3069,7 @@ class Trainer:
|
|||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
xm.mark_step()
|
xm.mark_step()
|
||||||
|
|
||||||
logs: Dict[str, float] = {}
|
logs: dict[str, float] = {}
|
||||||
|
|
||||||
# all_gather + mean() to get average loss over all processes
|
# all_gather + mean() to get average loss over all processes
|
||||||
tr_loss_scalar = self._nested_gather(tr_loss).mean().item()
|
tr_loss_scalar = self._nested_gather(tr_loss).mean().item()
|
||||||
@ -3529,14 +3528,14 @@ class Trainer:
|
|||||||
|
|
||||||
def hyperparameter_search(
|
def hyperparameter_search(
|
||||||
self,
|
self,
|
||||||
hp_space: Optional[Callable[["optuna.Trial"], Dict[str, float]]] = None,
|
hp_space: Optional[Callable[["optuna.Trial"], dict[str, float]]] = None,
|
||||||
compute_objective: Optional[Callable[[Dict[str, float]], float]] = None,
|
compute_objective: Optional[Callable[[dict[str, float]], float]] = None,
|
||||||
n_trials: int = 20,
|
n_trials: int = 20,
|
||||||
direction: Union[str, List[str]] = "minimize",
|
direction: Union[str, list[str]] = "minimize",
|
||||||
backend: Optional[Union["str", HPSearchBackend]] = None,
|
backend: Optional[Union["str", HPSearchBackend]] = None,
|
||||||
hp_name: Optional[Callable[["optuna.Trial"], str]] = None,
|
hp_name: Optional[Callable[["optuna.Trial"], str]] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Union[BestRun, List[BestRun]]:
|
) -> Union[BestRun, list[BestRun]]:
|
||||||
"""
|
"""
|
||||||
Launch an hyperparameter search using `optuna` or `Ray Tune` or `SigOpt`. The optimized quantity is determined
|
Launch an hyperparameter search using `optuna` or `Ray Tune` or `SigOpt`. The optimized quantity is determined
|
||||||
by `compute_objective`, which defaults to a function returning the evaluation loss when no metric is provided,
|
by `compute_objective`, which defaults to a function returning the evaluation loss when no metric is provided,
|
||||||
@ -3611,7 +3610,7 @@ class Trainer:
|
|||||||
self.hp_search_backend = None
|
self.hp_search_backend = None
|
||||||
return best_run
|
return best_run
|
||||||
|
|
||||||
def log(self, logs: Dict[str, float], start_time: Optional[float] = None) -> None:
|
def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
|
||||||
"""
|
"""
|
||||||
Log `logs` on the various objects watching training.
|
Log `logs` on the various objects watching training.
|
||||||
|
|
||||||
@ -3652,7 +3651,7 @@ class Trainer:
|
|||||||
return data.to(**kwargs)
|
return data.to(**kwargs)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
|
def _prepare_inputs(self, inputs: dict[str, Union[torch.Tensor, Any]]) -> dict[str, Union[torch.Tensor, Any]]:
|
||||||
"""
|
"""
|
||||||
Prepare `inputs` before feeding them to the model, converting them to tensors if they are not already and
|
Prepare `inputs` before feeding them to the model, converting them to tensors if they are not already and
|
||||||
handling potential state.
|
handling potential state.
|
||||||
@ -3687,7 +3686,7 @@ class Trainer:
|
|||||||
return ctx_manager
|
return ctx_manager
|
||||||
|
|
||||||
def training_step(
|
def training_step(
|
||||||
self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], num_items_in_batch=None
|
self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], num_items_in_batch=None
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
"""
|
"""
|
||||||
Perform a training step on a batch of inputs.
|
Perform a training step on a batch of inputs.
|
||||||
@ -4016,7 +4015,7 @@ class Trainer:
|
|||||||
|
|
||||||
def _sorted_checkpoints(
|
def _sorted_checkpoints(
|
||||||
self, output_dir=None, checkpoint_prefix=PREFIX_CHECKPOINT_DIR, use_mtime=False
|
self, output_dir=None, checkpoint_prefix=PREFIX_CHECKPOINT_DIR, use_mtime=False
|
||||||
) -> List[str]:
|
) -> list[str]:
|
||||||
ordering_and_checkpoint_path = []
|
ordering_and_checkpoint_path = []
|
||||||
|
|
||||||
glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{checkpoint_prefix}-*") if os.path.isdir(x)]
|
glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{checkpoint_prefix}-*") if os.path.isdir(x)]
|
||||||
@ -4068,10 +4067,10 @@ class Trainer:
|
|||||||
|
|
||||||
def evaluate(
|
def evaluate(
|
||||||
self,
|
self,
|
||||||
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
|
eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
metric_key_prefix: str = "eval",
|
metric_key_prefix: str = "eval",
|
||||||
) -> Dict[str, float]:
|
) -> dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Run evaluation and returns metrics.
|
Run evaluation and returns metrics.
|
||||||
|
|
||||||
@ -4171,7 +4170,7 @@ class Trainer:
|
|||||||
return output.metrics
|
return output.metrics
|
||||||
|
|
||||||
def predict(
|
def predict(
|
||||||
self, test_dataset: Dataset, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "test"
|
self, test_dataset: Dataset, ignore_keys: Optional[list[str]] = None, metric_key_prefix: str = "test"
|
||||||
) -> PredictionOutput:
|
) -> PredictionOutput:
|
||||||
"""
|
"""
|
||||||
Run prediction and returns predictions and potential metrics.
|
Run prediction and returns predictions and potential metrics.
|
||||||
@ -4239,7 +4238,7 @@ class Trainer:
|
|||||||
dataloader: DataLoader,
|
dataloader: DataLoader,
|
||||||
description: str,
|
description: str,
|
||||||
prediction_loss_only: Optional[bool] = None,
|
prediction_loss_only: Optional[bool] = None,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
metric_key_prefix: str = "eval",
|
metric_key_prefix: str = "eval",
|
||||||
) -> EvalLoopOutput:
|
) -> EvalLoopOutput:
|
||||||
"""
|
"""
|
||||||
@ -4339,11 +4338,11 @@ class Trainer:
|
|||||||
|
|
||||||
# Update containers
|
# Update containers
|
||||||
if losses is not None:
|
if losses is not None:
|
||||||
losses = self.gather_function((losses.repeat(batch_size)))
|
losses = self.gather_function(losses.repeat(batch_size))
|
||||||
all_losses.add(losses)
|
all_losses.add(losses)
|
||||||
if inputs_decode is not None:
|
if inputs_decode is not None:
|
||||||
inputs_decode = self.accelerator.pad_across_processes(inputs_decode, dim=1, pad_index=-100)
|
inputs_decode = self.accelerator.pad_across_processes(inputs_decode, dim=1, pad_index=-100)
|
||||||
inputs_decode = self.gather_function((inputs_decode))
|
inputs_decode = self.gather_function(inputs_decode)
|
||||||
if not self.args.batch_eval_metrics or description == "Prediction":
|
if not self.args.batch_eval_metrics or description == "Prediction":
|
||||||
all_inputs.add(inputs_decode)
|
all_inputs.add(inputs_decode)
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
@ -4353,11 +4352,11 @@ class Trainer:
|
|||||||
logits = self.accelerator.pad_across_processes(logits, dim=1, pad_index=-100)
|
logits = self.accelerator.pad_across_processes(logits, dim=1, pad_index=-100)
|
||||||
if self.preprocess_logits_for_metrics is not None:
|
if self.preprocess_logits_for_metrics is not None:
|
||||||
logits = self.preprocess_logits_for_metrics(logits, labels)
|
logits = self.preprocess_logits_for_metrics(logits, labels)
|
||||||
logits = self.gather_function((logits))
|
logits = self.gather_function(logits)
|
||||||
if not self.args.batch_eval_metrics or description == "Prediction":
|
if not self.args.batch_eval_metrics or description == "Prediction":
|
||||||
all_preds.add(logits)
|
all_preds.add(logits)
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
labels = self.gather_function((labels))
|
labels = self.gather_function(labels)
|
||||||
if not self.args.batch_eval_metrics or description == "Prediction":
|
if not self.args.batch_eval_metrics or description == "Prediction":
|
||||||
all_labels.add(labels)
|
all_labels.add(labels)
|
||||||
|
|
||||||
@ -4470,10 +4469,10 @@ class Trainer:
|
|||||||
def prediction_step(
|
def prediction_step(
|
||||||
self,
|
self,
|
||||||
model: nn.Module,
|
model: nn.Module,
|
||||||
inputs: Dict[str, Union[torch.Tensor, Any]],
|
inputs: dict[str, Union[torch.Tensor, Any]],
|
||||||
prediction_loss_only: bool,
|
prediction_loss_only: bool,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
) -> tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||||
"""
|
"""
|
||||||
Perform an evaluation step on `model` using `inputs`.
|
Perform an evaluation step on `model` using `inputs`.
|
||||||
|
|
||||||
@ -4572,7 +4571,7 @@ class Trainer:
|
|||||||
|
|
||||||
return (loss, logits, labels)
|
return (loss, logits, labels)
|
||||||
|
|
||||||
def floating_point_ops(self, inputs: Dict[str, Union[torch.Tensor, Any]]):
|
def floating_point_ops(self, inputs: dict[str, Union[torch.Tensor, Any]]):
|
||||||
"""
|
"""
|
||||||
For models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point
|
For models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point
|
||||||
operations for every backward + forward pass. If using another model, either implement such a method in the
|
operations for every backward + forward pass. If using another model, either implement such a method in the
|
||||||
@ -4612,13 +4611,13 @@ class Trainer:
|
|||||||
self,
|
self,
|
||||||
language: Optional[str] = None,
|
language: Optional[str] = None,
|
||||||
license: Optional[str] = None,
|
license: Optional[str] = None,
|
||||||
tags: Union[str, List[str], None] = None,
|
tags: Union[str, list[str], None] = None,
|
||||||
model_name: Optional[str] = None,
|
model_name: Optional[str] = None,
|
||||||
finetuned_from: Optional[str] = None,
|
finetuned_from: Optional[str] = None,
|
||||||
tasks: Union[str, List[str], None] = None,
|
tasks: Union[str, list[str], None] = None,
|
||||||
dataset_tags: Union[str, List[str], None] = None,
|
dataset_tags: Union[str, list[str], None] = None,
|
||||||
dataset: Union[str, List[str], None] = None,
|
dataset: Union[str, list[str], None] = None,
|
||||||
dataset_args: Union[str, List[str], None] = None,
|
dataset_args: Union[str, list[str], None] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Creates a draft of a model card using the information available to the `Trainer`.
|
Creates a draft of a model card using the information available to the `Trainer`.
|
||||||
@ -4840,7 +4839,7 @@ class Trainer:
|
|||||||
dataloader: DataLoader,
|
dataloader: DataLoader,
|
||||||
description: str,
|
description: str,
|
||||||
prediction_loss_only: Optional[bool] = None,
|
prediction_loss_only: Optional[bool] = None,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
metric_key_prefix: str = "eval",
|
metric_key_prefix: str = "eval",
|
||||||
) -> EvalLoopOutput:
|
) -> EvalLoopOutput:
|
||||||
"""
|
"""
|
||||||
@ -4904,9 +4903,9 @@ class Trainer:
|
|||||||
logger.info(f" Batch size = {batch_size}")
|
logger.info(f" Batch size = {batch_size}")
|
||||||
|
|
||||||
losses_host: torch.Tensor = None
|
losses_host: torch.Tensor = None
|
||||||
preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
|
preds_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||||
labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
|
labels_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||||
inputs_host: Union[torch.Tensor, List[torch.Tensor]] = None
|
inputs_host: Union[torch.Tensor, list[torch.Tensor]] = None
|
||||||
metrics: Optional[dict] = None
|
metrics: Optional[dict] = None
|
||||||
eval_set_kwargs: dict = {}
|
eval_set_kwargs: dict = {}
|
||||||
|
|
||||||
@ -5047,7 +5046,7 @@ class Trainer:
|
|||||||
|
|
||||||
# Get current .gitignore content
|
# Get current .gitignore content
|
||||||
if os.path.exists(os.path.join(self.repo.local_dir, ".gitignore")):
|
if os.path.exists(os.path.join(self.repo.local_dir, ".gitignore")):
|
||||||
with open(os.path.join(self.repo.local_dir, ".gitignore"), "r") as f:
|
with open(os.path.join(self.repo.local_dir, ".gitignore")) as f:
|
||||||
current_content = f.read()
|
current_content = f.read()
|
||||||
else:
|
else:
|
||||||
current_content = ""
|
current_content = ""
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020-present the HuggingFace Inc. team.
|
# Copyright 2020-present the HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -20,7 +19,7 @@ import dataclasses
|
|||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
@ -104,7 +103,7 @@ class TrainerState:
|
|||||||
num_train_epochs: int = 0
|
num_train_epochs: int = 0
|
||||||
num_input_tokens_seen: int = 0
|
num_input_tokens_seen: int = 0
|
||||||
total_flos: float = 0
|
total_flos: float = 0
|
||||||
log_history: List[Dict[str, float]] = None
|
log_history: list[dict[str, float]] = None
|
||||||
best_metric: Optional[float] = None
|
best_metric: Optional[float] = None
|
||||||
best_global_step: Optional[int] = None
|
best_global_step: Optional[int] = None
|
||||||
best_model_checkpoint: Optional[str] = None
|
best_model_checkpoint: Optional[str] = None
|
||||||
@ -112,8 +111,8 @@ class TrainerState:
|
|||||||
is_world_process_zero: bool = True
|
is_world_process_zero: bool = True
|
||||||
is_hyper_param_search: bool = False
|
is_hyper_param_search: bool = False
|
||||||
trial_name: str = None
|
trial_name: str = None
|
||||||
trial_params: Dict[str, Union[str, float, int, bool]] = None
|
trial_params: dict[str, Union[str, float, int, bool]] = None
|
||||||
stateful_callbacks: List["TrainerCallback"] = None
|
stateful_callbacks: list["TrainerCallback"] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.log_history is None:
|
if self.log_history is None:
|
||||||
@ -151,7 +150,7 @@ class TrainerState:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def load_from_json(cls, json_path: str):
|
def load_from_json(cls, json_path: str):
|
||||||
"""Create an instance from the content of `json_path`."""
|
"""Create an instance from the content of `json_path`."""
|
||||||
with open(json_path, "r", encoding="utf-8") as f:
|
with open(json_path, encoding="utf-8") as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
return cls(**json.loads(text))
|
return cls(**json.loads(text))
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020-present the HuggingFace Inc. team.
|
# Copyright 2020-present the HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -24,12 +23,12 @@ import math
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Mapping
|
from collections.abc import Iterator, Mapping
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from logging import StreamHandler
|
from logging import StreamHandler
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -221,7 +220,7 @@ def distributed_concat(tensor: Any, num_total_examples: Optional[int] = None) ->
|
|||||||
|
|
||||||
|
|
||||||
def distributed_broadcast_scalars(
|
def distributed_broadcast_scalars(
|
||||||
scalars: List[Union[int, float]],
|
scalars: list[Union[int, float]],
|
||||||
num_total_examples: Optional[int] = None,
|
num_total_examples: Optional[int] = None,
|
||||||
device: Optional[torch.device] = torch.device("cuda"),
|
device: Optional[torch.device] = torch.device("cuda"),
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
@ -624,7 +623,7 @@ class LengthGroupedSampler(Sampler):
|
|||||||
self,
|
self,
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
dataset: Optional[Dataset] = None,
|
dataset: Optional[Dataset] = None,
|
||||||
lengths: Optional[List[int]] = None,
|
lengths: Optional[list[int]] = None,
|
||||||
model_input_name: Optional[str] = None,
|
model_input_name: Optional[str] = None,
|
||||||
generator=None,
|
generator=None,
|
||||||
):
|
):
|
||||||
@ -675,7 +674,7 @@ class DistributedLengthGroupedSampler(DistributedSampler):
|
|||||||
rank: Optional[int] = None,
|
rank: Optional[int] = None,
|
||||||
seed: int = 0,
|
seed: int = 0,
|
||||||
drop_last: bool = False,
|
drop_last: bool = False,
|
||||||
lengths: Optional[List[int]] = None,
|
lengths: Optional[list[int]] = None,
|
||||||
model_input_name: Optional[str] = None,
|
model_input_name: Optional[str] = None,
|
||||||
):
|
):
|
||||||
if dataset is None and lengths is None:
|
if dataset is None and lengths is None:
|
||||||
@ -936,7 +935,7 @@ def _secs2timedelta(secs):
|
|||||||
return f"{datetime.timedelta(seconds=int(secs))}.{msec:02d}"
|
return f"{datetime.timedelta(seconds=int(secs))}.{msec:02d}"
|
||||||
|
|
||||||
|
|
||||||
def metrics_format(self, metrics: Dict[str, float]) -> Dict[str, float]:
|
def metrics_format(self, metrics: dict[str, float]) -> dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Reformat Trainer metrics values to a human-readable format
|
Reformat Trainer metrics values to a human-readable format
|
||||||
|
|
||||||
@ -1080,7 +1079,7 @@ def save_metrics(self, split, metrics, combined=True):
|
|||||||
if combined:
|
if combined:
|
||||||
path = os.path.join(self.args.output_dir, "all_results.json")
|
path = os.path.join(self.args.output_dir, "all_results.json")
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
with open(path, "r") as f:
|
with open(path) as f:
|
||||||
all_metrics = json.load(f)
|
all_metrics = json.load(f)
|
||||||
else:
|
else:
|
||||||
all_metrics = {}
|
all_metrics = {}
|
||||||
@ -1300,7 +1299,7 @@ class AcceleratorConfig:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
gradient_accumulation_kwargs: Optional[Dict] = field(
|
gradient_accumulation_kwargs: Optional[dict] = field(
|
||||||
default=None,
|
default=None,
|
||||||
metadata={
|
metadata={
|
||||||
"help": "Additional kwargs to configure gradient accumulation, see [`accelerate.utils.GradientAccumulationPlugin`]. "
|
"help": "Additional kwargs to configure gradient accumulation, see [`accelerate.utils.GradientAccumulationPlugin`]. "
|
||||||
|
@ -16,7 +16,7 @@ import contextlib
|
|||||||
import warnings
|
import warnings
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -59,15 +59,15 @@ class Seq2SeqTrainer(Trainer):
|
|||||||
args: "TrainingArguments" = None,
|
args: "TrainingArguments" = None,
|
||||||
data_collator: Optional["DataCollator"] = None,
|
data_collator: Optional["DataCollator"] = None,
|
||||||
train_dataset: Optional[Union[Dataset, "IterableDataset", "datasets.Dataset"]] = None,
|
train_dataset: Optional[Union[Dataset, "IterableDataset", "datasets.Dataset"]] = None,
|
||||||
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
|
eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None,
|
||||||
processing_class: Optional[
|
processing_class: Optional[
|
||||||
Union["PreTrainedTokenizerBase", "BaseImageProcessor", "FeatureExtractionMixin", "ProcessorMixin"]
|
Union["PreTrainedTokenizerBase", "BaseImageProcessor", "FeatureExtractionMixin", "ProcessorMixin"]
|
||||||
] = None,
|
] = None,
|
||||||
model_init: Optional[Callable[[], "PreTrainedModel"]] = None,
|
model_init: Optional[Callable[[], "PreTrainedModel"]] = None,
|
||||||
compute_loss_func: Optional[Callable] = None,
|
compute_loss_func: Optional[Callable] = None,
|
||||||
compute_metrics: Optional[Callable[["EvalPrediction"], Dict]] = None,
|
compute_metrics: Optional[Callable[["EvalPrediction"], dict]] = None,
|
||||||
callbacks: Optional[List["TrainerCallback"]] = None,
|
callbacks: Optional[list["TrainerCallback"]] = None,
|
||||||
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
||||||
preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
|
preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
@ -143,10 +143,10 @@ class Seq2SeqTrainer(Trainer):
|
|||||||
def evaluate(
|
def evaluate(
|
||||||
self,
|
self,
|
||||||
eval_dataset: Optional[Dataset] = None,
|
eval_dataset: Optional[Dataset] = None,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
metric_key_prefix: str = "eval",
|
metric_key_prefix: str = "eval",
|
||||||
**gen_kwargs,
|
**gen_kwargs,
|
||||||
) -> Dict[str, float]:
|
) -> dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Run evaluation and returns metrics.
|
Run evaluation and returns metrics.
|
||||||
|
|
||||||
@ -199,7 +199,7 @@ class Seq2SeqTrainer(Trainer):
|
|||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
test_dataset: Dataset,
|
test_dataset: Dataset,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
metric_key_prefix: str = "test",
|
metric_key_prefix: str = "test",
|
||||||
**gen_kwargs,
|
**gen_kwargs,
|
||||||
) -> "PredictionOutput":
|
) -> "PredictionOutput":
|
||||||
@ -263,11 +263,11 @@ class Seq2SeqTrainer(Trainer):
|
|||||||
def prediction_step(
|
def prediction_step(
|
||||||
self,
|
self,
|
||||||
model: nn.Module,
|
model: nn.Module,
|
||||||
inputs: Dict[str, Union[torch.Tensor, Any]],
|
inputs: dict[str, Union[torch.Tensor, Any]],
|
||||||
prediction_loss_only: bool,
|
prediction_loss_only: bool,
|
||||||
ignore_keys: Optional[List[str]] = None,
|
ignore_keys: Optional[list[str]] = None,
|
||||||
**gen_kwargs,
|
**gen_kwargs,
|
||||||
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||||
"""
|
"""
|
||||||
Perform an evaluation step on `model` using `inputs`.
|
Perform an evaluation step on `model` using `inputs`.
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2020-present the HuggingFace Inc. team.
|
# Copyright 2020-present the HuggingFace Inc. team.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -25,7 +24,7 @@ import random
|
|||||||
import re
|
import re
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
|
from typing import Any, NamedTuple, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -165,10 +164,10 @@ class EvalPrediction:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
predictions: Union[np.ndarray, Tuple[np.ndarray]],
|
predictions: Union[np.ndarray, tuple[np.ndarray]],
|
||||||
label_ids: Union[np.ndarray, Tuple[np.ndarray]],
|
label_ids: Union[np.ndarray, tuple[np.ndarray]],
|
||||||
inputs: Optional[Union[np.ndarray, Tuple[np.ndarray]]] = None,
|
inputs: Optional[Union[np.ndarray, tuple[np.ndarray]]] = None,
|
||||||
losses: Optional[Union[np.ndarray, Tuple[np.ndarray]]] = None,
|
losses: Optional[Union[np.ndarray, tuple[np.ndarray]]] = None,
|
||||||
):
|
):
|
||||||
self.predictions = predictions
|
self.predictions = predictions
|
||||||
self.label_ids = label_ids
|
self.label_ids = label_ids
|
||||||
@ -190,22 +189,22 @@ class EvalPrediction:
|
|||||||
|
|
||||||
|
|
||||||
class EvalLoopOutput(NamedTuple):
|
class EvalLoopOutput(NamedTuple):
|
||||||
predictions: Union[np.ndarray, Tuple[np.ndarray]]
|
predictions: Union[np.ndarray, tuple[np.ndarray]]
|
||||||
label_ids: Optional[Union[np.ndarray, Tuple[np.ndarray]]]
|
label_ids: Optional[Union[np.ndarray, tuple[np.ndarray]]]
|
||||||
metrics: Optional[Dict[str, float]]
|
metrics: Optional[dict[str, float]]
|
||||||
num_samples: Optional[int]
|
num_samples: Optional[int]
|
||||||
|
|
||||||
|
|
||||||
class PredictionOutput(NamedTuple):
|
class PredictionOutput(NamedTuple):
|
||||||
predictions: Union[np.ndarray, Tuple[np.ndarray]]
|
predictions: Union[np.ndarray, tuple[np.ndarray]]
|
||||||
label_ids: Optional[Union[np.ndarray, Tuple[np.ndarray]]]
|
label_ids: Optional[Union[np.ndarray, tuple[np.ndarray]]]
|
||||||
metrics: Optional[Dict[str, float]]
|
metrics: Optional[dict[str, float]]
|
||||||
|
|
||||||
|
|
||||||
class TrainOutput(NamedTuple):
|
class TrainOutput(NamedTuple):
|
||||||
global_step: int
|
global_step: int
|
||||||
training_loss: float
|
training_loss: float
|
||||||
metrics: Dict[str, float]
|
metrics: dict[str, float]
|
||||||
|
|
||||||
|
|
||||||
PREFIX_CHECKPOINT_DIR = "checkpoint"
|
PREFIX_CHECKPOINT_DIR = "checkpoint"
|
||||||
@ -267,12 +266,12 @@ class BestRun(NamedTuple):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
run_id: str
|
run_id: str
|
||||||
objective: Union[float, List[float]]
|
objective: Union[float, list[float]]
|
||||||
hyperparameters: Dict[str, Any]
|
hyperparameters: dict[str, Any]
|
||||||
run_summary: Optional[Any] = None
|
run_summary: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
def default_compute_objective(metrics: Dict[str, float]) -> float:
|
def default_compute_objective(metrics: dict[str, float]) -> float:
|
||||||
"""
|
"""
|
||||||
The default objective to maximize/minimize when doing an hyperparameter search. It is the evaluation loss if no
|
The default objective to maximize/minimize when doing an hyperparameter search. It is the evaluation loss if no
|
||||||
metrics are provided to the [`Trainer`], the sum of all metrics otherwise.
|
metrics are provided to the [`Trainer`], the sum of all metrics otherwise.
|
||||||
@ -297,7 +296,7 @@ def default_compute_objective(metrics: Dict[str, float]) -> float:
|
|||||||
return loss if len(metrics) == 0 else sum(metrics.values())
|
return loss if len(metrics) == 0 else sum(metrics.values())
|
||||||
|
|
||||||
|
|
||||||
def default_hp_space_optuna(trial) -> Dict[str, float]:
|
def default_hp_space_optuna(trial) -> dict[str, float]:
|
||||||
from .integrations import is_optuna_available
|
from .integrations import is_optuna_available
|
||||||
|
|
||||||
assert is_optuna_available(), "This function needs Optuna installed: `pip install optuna`"
|
assert is_optuna_available(), "This function needs Optuna installed: `pip install optuna`"
|
||||||
@ -309,7 +308,7 @@ def default_hp_space_optuna(trial) -> Dict[str, float]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def default_hp_space_ray(trial) -> Dict[str, float]:
|
def default_hp_space_ray(trial) -> dict[str, float]:
|
||||||
from .integrations import is_ray_tune_available
|
from .integrations import is_ray_tune_available
|
||||||
|
|
||||||
assert is_ray_tune_available(), "This function needs ray installed: `pip install ray[tune]`"
|
assert is_ray_tune_available(), "This function needs ray installed: `pip install ray[tune]`"
|
||||||
@ -336,7 +335,7 @@ def default_hp_space_sigopt(trial):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def default_hp_space_wandb(trial) -> Dict[str, float]:
|
def default_hp_space_wandb(trial) -> dict[str, float]:
|
||||||
from .integrations import is_wandb_available
|
from .integrations import is_wandb_available
|
||||||
|
|
||||||
if not is_wandb_available():
|
if not is_wandb_available():
|
||||||
@ -867,7 +866,7 @@ class RemoveColumnsCollator:
|
|||||||
self.message_logged = True
|
self.message_logged = True
|
||||||
return {k: v for k, v in feature.items() if k in self.signature_columns}
|
return {k: v for k, v in feature.items() if k in self.signature_columns}
|
||||||
|
|
||||||
def __call__(self, features: List[dict]):
|
def __call__(self, features: list[dict]):
|
||||||
features = [self._remove_columns(feature) for feature in features]
|
features = [self._remove_columns(feature) for feature in features]
|
||||||
return self.data_collator(features)
|
return self.data_collator(features)
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional, Tuple
|
from typing import Optional
|
||||||
|
|
||||||
from .training_args import TrainingArguments
|
from .training_args import TrainingArguments
|
||||||
from .utils import cached_property, is_tf_available, logging, requires_backends
|
from .utils import cached_property, is_tf_available, logging, requires_backends
|
||||||
@ -189,7 +189,7 @@ class TFTrainingArguments(TrainingArguments):
|
|||||||
xla: bool = field(default=False, metadata={"help": "Whether to activate the XLA compilation or not"})
|
xla: bool = field(default=False, metadata={"help": "Whether to activate the XLA compilation or not"})
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
|
def _setup_strategy(self) -> tuple["tf.distribute.Strategy", int]:
|
||||||
requires_backends(self, ["tf"])
|
requires_backends(self, ["tf"])
|
||||||
logger.info("Tensorflow: setting up strategy")
|
logger.info("Tensorflow: setting up strategy")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user