Use Python 3.9 syntax in examples (#37279)

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyyever 2025-04-07 19:52:21 +08:00 committed by GitHub
parent 08f36771b3
commit 0fb8d49e88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
123 changed files with 358 additions and 451 deletions

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved. # Copyright 2022 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved. # Copyright 2022 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -833,8 +832,7 @@ def main():
# No need to shuffle here # No need to shuffle here
loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False) loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False)
for batch in loader: yield from loader
yield batch
# Metric # Metric
metric = evaluate.load("rouge", cache_dir=model_args.cache_dir) metric = evaluate.load("rouge", cache_dir=model_args.cache_dir)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional from typing import Optional
import flax import flax
import jax import jax
@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM:
" language modeling. " " language modeling. "
) )
def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding: def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding:
# convert list to dict and tensorize input # convert list to dict and tensorize input
batch = BatchEncoding( batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()} {k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -33,7 +32,7 @@ from itertools import chain
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Tuple from typing import Optional
import flax import flax
import jax import jax
@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling:
"You should pass `mlm=False` to train on causal language modeling instead." "You should pass `mlm=False` to train on causal language modeling instead."
) )
def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]: def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]:
# Handle dict or lists with proper padding and conversion to tensor. # Handle dict or lists with proper padding and conversion to tensor.
batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY) batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY)
@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling:
def mask_tokens( def mask_tokens(
self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray] self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray]
) -> Tuple[np.ndarray, np.ndarray]: ) -> tuple[np.ndarray, np.ndarray]:
""" """
Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
""" """

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional from typing import Optional
import flax import flax
import jax import jax
@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM:
pad_token_id: int pad_token_id: int
decoder_start_token_id: int decoder_start_token_id: int
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding: def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding:
# convert list to dict and tensorize input # convert list to dict and tensorize input
batch = BatchEncoding( batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()} {k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json import json
from typing import Iterator, List, Union from collections.abc import Iterator
from typing import Union
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers
from tokenizers.implementations.base_tokenizer import BaseTokenizer from tokenizers.implementations.base_tokenizer import BaseTokenizer
@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
def train( def train(
self, self,
files: Union[str, List[str]], files: Union[str, list[str]],
vocab_size: int = 8000, vocab_size: int = 8000,
show_progress: bool = True, show_progress: bool = True,
): ):

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -28,7 +27,7 @@ import time
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple from typing import Any, Callable, Optional
import datasets import datasets
import evaluate import evaluate
@ -908,8 +907,8 @@ def main():
# region Define train step functions # region Define train step functions
def train_step( def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]: ) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
start_positions = batch.pop("start_positions") start_positions = batch.pop("start_positions")

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import collections
import json import json
import logging import logging
import os import os
from typing import Optional, Tuple from typing import Optional
import numpy as np import numpy as np
from tqdm.auto import tqdm from tqdm.auto import tqdm
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
def postprocess_qa_predictions( def postprocess_qa_predictions(
examples, examples,
features, features,
predictions: Tuple[np.ndarray, np.ndarray], predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False, version_2_with_negative: bool = False,
n_best_size: int = 20, n_best_size: int = 20,
max_answer_length: int = 30, max_answer_length: int = 30,
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
# If we have an output_dir, let's save all those dicts. # If we have an output_dir, let's save all those dicts.
if output_dir is not None: if output_dir is not None:
if not os.path.isdir(output_dir): if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.") raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join( prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
def postprocess_qa_predictions_with_beam_search( def postprocess_qa_predictions_with_beam_search(
examples, examples,
features, features,
predictions: Tuple[np.ndarray, np.ndarray], predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False, version_2_with_negative: bool = False,
n_best_size: int = 20, n_best_size: int = 20,
max_answer_length: int = 30, max_answer_length: int = 30,
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
# If we have an output_dir, let's save all those dicts. # If we have an output_dir, let's save all those dicts.
if output_dir is not None: if output_dir is not None:
if not os.path.isdir(output_dir): if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.") raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join( prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -25,7 +24,7 @@ import time
from dataclasses import field from dataclasses import field
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union from typing import Any, Callable, Optional, Union
import datasets import datasets
import evaluate import evaluate
@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
pad_input_to_multiple_of: Optional[int] = None pad_input_to_multiple_of: Optional[int] = None
pad_target_to_multiple_of: Optional[int] = None pad_target_to_multiple_of: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]:
# split inputs and labels since they have to be of different lengths and need # split inputs and labels since they have to be of different lengths and need
# different padding methods # different padding methods
model_input_name = self.processor.model_input_names[0] model_input_name = self.processor.model_input_names[0]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 HuggingFace Inc. # Copyright 2021 HuggingFace Inc.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -64,7 +63,7 @@ def get_setup_file():
def get_results(output_dir, split="eval"): def get_results(output_dir, split="eval"):
path = os.path.join(output_dir, f"{split}_results.json") path = os.path.join(output_dir, f"{split}_results.json")
if os.path.exists(path): if os.path.exists(path):
with open(path, "r") as f: with open(path) as f:
return json.load(f) return json.load(f)
raise ValueError(f"can't find {path}") raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -25,7 +24,7 @@ import time
import warnings import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple from typing import Any, Callable, Optional
import datasets import datasets
import evaluate import evaluate
@ -572,8 +571,8 @@ def main():
# define step functions # define step functions
def train_step( def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]: ) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels") targets = batch.pop("labels")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple from typing import Any, Callable, Optional
import datasets import datasets
import evaluate import evaluate
@ -651,8 +650,8 @@ def main():
# define step functions # define step functions
def train_step( def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]: ) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels") targets = batch.pop("labels")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -15,7 +15,7 @@
import csv import csv
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import Optional
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
@ -59,7 +59,7 @@ class PlotArguments:
default=None, default=None,
metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."}, metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."},
) )
short_model_names: Optional[List[str]] = list_field( short_model_names: Optional[list[str]] = list_field(
default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."} default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."}
) )

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. # Copyright 2020 The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -18,7 +17,7 @@
import logging import logging
import os import os
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Dict, Optional from typing import Optional
import numpy as np import numpy as np
from utils_multiple_choice import MultipleChoiceDataset, Split, processors from utils_multiple_choice import MultipleChoiceDataset, Split, processors
@ -187,7 +186,7 @@ def main():
else None else None
) )
def compute_metrics(p: EvalPrediction) -> Dict: def compute_metrics(p: EvalPrediction) -> dict:
preds = np.argmax(p.predictions, axis=1) preds = np.argmax(p.predictions, axis=1)
return {"acc": simple_accuracy(preds, p.label_ids)} return {"acc": simple_accuracy(preds, p.label_ids)}
@ -228,7 +227,7 @@ def main():
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in result.items(): for key, value in result.items():
logger.info(" %s = %s", key, value) logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value)) writer.write("{} = {}\n".format(key, value))
results.update(result) results.update(result)

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -22,7 +21,7 @@ import logging
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Optional from typing import Optional
import tqdm import tqdm
from filelock import FileLock from filelock import FileLock
@ -49,8 +48,8 @@ class InputExample:
example_id: str example_id: str
question: str question: str
contexts: List[str] contexts: list[str]
endings: List[str] endings: list[str]
label: Optional[str] label: Optional[str]
@ -62,9 +61,9 @@ class InputFeatures:
""" """
example_id: str example_id: str
input_ids: List[List[int]] input_ids: list[list[int]]
attention_mask: Optional[List[List[int]]] attention_mask: Optional[list[list[int]]]
token_type_ids: Optional[List[List[int]]] token_type_ids: Optional[list[list[int]]]
label: Optional[int] label: Optional[int]
@ -84,7 +83,7 @@ if is_torch_available():
soon. soon.
""" """
features: List[InputFeatures] features: list[InputFeatures]
def __init__( def __init__(
self, self,
@ -149,7 +148,7 @@ if is_tf_available():
soon. soon.
""" """
features: List[InputFeatures] features: list[InputFeatures]
def __init__( def __init__(
self, self,
@ -253,7 +252,7 @@ class RaceProcessor(DataProcessor):
def get_train_examples(self, data_dir): def get_train_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} train".format(data_dir)) logger.info(f"LOOKING AT {data_dir} train")
high = os.path.join(data_dir, "train/high") high = os.path.join(data_dir, "train/high")
middle = os.path.join(data_dir, "train/middle") middle = os.path.join(data_dir, "train/middle")
high = self._read_txt(high) high = self._read_txt(high)
@ -262,7 +261,7 @@ class RaceProcessor(DataProcessor):
def get_dev_examples(self, data_dir): def get_dev_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
high = os.path.join(data_dir, "dev/high") high = os.path.join(data_dir, "dev/high")
middle = os.path.join(data_dir, "dev/middle") middle = os.path.join(data_dir, "dev/middle")
high = self._read_txt(high) high = self._read_txt(high)
@ -271,7 +270,7 @@ class RaceProcessor(DataProcessor):
def get_test_examples(self, data_dir): def get_test_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} test".format(data_dir)) logger.info(f"LOOKING AT {data_dir} test")
high = os.path.join(data_dir, "test/high") high = os.path.join(data_dir, "test/high")
middle = os.path.join(data_dir, "test/middle") middle = os.path.join(data_dir, "test/middle")
high = self._read_txt(high) high = self._read_txt(high)
@ -286,7 +285,7 @@ class RaceProcessor(DataProcessor):
lines = [] lines = []
files = glob.glob(input_dir + "/*txt") files = glob.glob(input_dir + "/*txt")
for file in tqdm.tqdm(files, desc="read files"): for file in tqdm.tqdm(files, desc="read files"):
with open(file, "r", encoding="utf-8") as fin: with open(file, encoding="utf-8") as fin:
data_raw = json.load(fin) data_raw = json.load(fin)
data_raw["race_id"] = file data_raw["race_id"] = file
lines.append(data_raw) lines.append(data_raw)
@ -296,7 +295,7 @@ class RaceProcessor(DataProcessor):
"""Creates examples for the training and dev sets.""" """Creates examples for the training and dev sets."""
examples = [] examples = []
for _, data_raw in enumerate(lines): for _, data_raw in enumerate(lines):
race_id = "%s-%s" % (set_type, data_raw["race_id"]) race_id = "{}-{}".format(set_type, data_raw["race_id"])
article = data_raw["article"] article = data_raw["article"]
for i in range(len(data_raw["answers"])): for i in range(len(data_raw["answers"])):
truth = str(ord(data_raw["answers"][i]) - ord("A")) truth = str(ord(data_raw["answers"][i]) - ord("A"))
@ -320,17 +319,17 @@ class SynonymProcessor(DataProcessor):
def get_train_examples(self, data_dir): def get_train_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} train".format(data_dir)) logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train")
def get_dev_examples(self, data_dir): def get_dev_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev")
def get_test_examples(self, data_dir): def get_test_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test")
@ -339,10 +338,10 @@ class SynonymProcessor(DataProcessor):
return ["0", "1", "2", "3", "4"] return ["0", "1", "2", "3", "4"]
def _read_csv(self, input_file): def _read_csv(self, input_file):
with open(input_file, "r", encoding="utf-8") as f: with open(input_file, encoding="utf-8") as f:
return list(csv.reader(f)) return list(csv.reader(f))
def _create_examples(self, lines: List[List[str]], type: str): def _create_examples(self, lines: list[list[str]], type: str):
"""Creates examples for the training and dev sets.""" """Creates examples for the training and dev sets."""
examples = [ examples = [
@ -366,17 +365,17 @@ class SwagProcessor(DataProcessor):
def get_train_examples(self, data_dir): def get_train_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} train".format(data_dir)) logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train") return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")
def get_dev_examples(self, data_dir): def get_dev_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")
def get_test_examples(self, data_dir): def get_test_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
raise ValueError( raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code " "For swag testing, the input file does not contain a label column. It can not be tested in current code "
"setting!" "setting!"
@ -388,10 +387,10 @@ class SwagProcessor(DataProcessor):
return ["0", "1", "2", "3"] return ["0", "1", "2", "3"]
def _read_csv(self, input_file): def _read_csv(self, input_file):
with open(input_file, "r", encoding="utf-8") as f: with open(input_file, encoding="utf-8") as f:
return list(csv.reader(f)) return list(csv.reader(f))
def _create_examples(self, lines: List[List[str]], type: str): def _create_examples(self, lines: list[list[str]], type: str):
"""Creates examples for the training and dev sets.""" """Creates examples for the training and dev sets."""
if type == "train" and lines[0][-1] != "label": if type == "train" and lines[0][-1] != "label":
raise ValueError("For training, the input file must contain a label column.") raise ValueError("For training, the input file must contain a label column.")
@ -417,16 +416,16 @@ class ArcProcessor(DataProcessor):
def get_train_examples(self, data_dir): def get_train_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} train".format(data_dir)) logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train") return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train")
def get_dev_examples(self, data_dir): def get_dev_examples(self, data_dir):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev") return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev")
def get_test_examples(self, data_dir): def get_test_examples(self, data_dir):
logger.info("LOOKING AT {} test".format(data_dir)) logger.info(f"LOOKING AT {data_dir} test")
return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test") return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test")
def get_labels(self): def get_labels(self):
@ -434,7 +433,7 @@ class ArcProcessor(DataProcessor):
return ["0", "1", "2", "3"] return ["0", "1", "2", "3"]
def _read_json(self, input_file): def _read_json(self, input_file):
with open(input_file, "r", encoding="utf-8") as fin: with open(input_file, encoding="utf-8") as fin:
lines = fin.readlines() lines = fin.readlines()
return lines return lines
@ -504,11 +503,11 @@ class ArcProcessor(DataProcessor):
def convert_examples_to_features( def convert_examples_to_features(
examples: List[InputExample], examples: list[InputExample],
label_list: List[str], label_list: list[str],
max_length: int, max_length: int,
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
) -> List[InputFeatures]: ) -> list[InputFeatures]:
""" """
Loads a data file into a list of `InputFeatures` Loads a data file into a list of `InputFeatures`
""" """

View File

@ -2,7 +2,7 @@ import argparse
import logging import logging
import os import os
from pathlib import Path from pathlib import Path
from typing import Any, Dict from typing import Any
import pytorch_lightning as pl import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_info from pytorch_lightning.utilities import rank_zero_info
@ -201,7 +201,7 @@ class BaseTransformer(pl.LightningModule):
) )
@pl.utilities.rank_zero_only @pl.utilities.rank_zero_only
def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None: def on_save_checkpoint(self, checkpoint: dict[str, Any]) -> None:
save_path = self.output_dir.joinpath("best_tfmr") save_path = self.output_dir.joinpath("best_tfmr")
self.model.config.save_step = self.step_count self.model.config.save_step = self.step_count
self.model.save_pretrained(save_path) self.model.save_pretrained(save_path)
@ -282,7 +282,7 @@ class LoggingCallback(pl.Callback):
# Log results # Log results
for key in sorted(metrics): for key in sorted(metrics):
if key not in ["log", "progress_bar"]: if key not in ["log", "progress_bar"]:
rank_zero_info("{} = {}\n".format(key, str(metrics[key]))) rank_zero_info(f"{key} = {str(metrics[key])}\n")
def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule): def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule):
rank_zero_info("***** Test results *****") rank_zero_info("***** Test results *****")
@ -292,8 +292,8 @@ class LoggingCallback(pl.Callback):
with open(output_test_results_file, "w") as writer: with open(output_test_results_file, "w") as writer:
for key in sorted(metrics): for key in sorted(metrics):
if key not in ["log", "progress_bar"]: if key not in ["log", "progress_bar"]:
rank_zero_info("{} = {}\n".format(key, str(metrics[key]))) rank_zero_info(f"{key} = {str(metrics[key])}\n")
writer.write("{} = {}\n".format(key, str(metrics[key]))) writer.write(f"{key} = {str(metrics[key])}\n")
def add_generic_args(parser, root_dir) -> None: def add_generic_args(parser, root_dir) -> None:

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -231,14 +230,14 @@ def train(args, train_dataset, model, tokenizer):
if args.local_rank == -1 and args.evaluate_during_training: if args.local_rank == -1 and args.evaluate_during_training:
results = evaluate(args, model, tokenizer) results = evaluate(args, model, tokenizer)
for key, value in results.items(): for key, value in results.items():
tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar(f"eval_{key}", value, global_step)
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
logging_loss = tr_loss logging_loss = tr_loss
# Save model checkpoint # Save model checkpoint
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step)) output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
# Take care of distributed/parallel training # Take care of distributed/parallel training
model_to_save = model.module if hasattr(model, "module") else model model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir) model_to_save.save_pretrained(output_dir)
@ -281,7 +280,7 @@ def evaluate(args, model, tokenizer, prefix=""):
model = torch.nn.DataParallel(model) model = torch.nn.DataParallel(model)
# Eval! # Eval!
logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(f"***** Running evaluation {prefix} *****")
logger.info(" Num examples = %d", len(dataset)) logger.info(" Num examples = %d", len(dataset))
logger.info(" Batch size = %d", args.eval_batch_size) logger.info(" Batch size = %d", args.eval_batch_size)
@ -348,11 +347,11 @@ def evaluate(args, model, tokenizer, prefix=""):
logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset)) logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset))
# Compute predictions # Compute predictions
output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_prediction_file = os.path.join(args.output_dir, f"predictions_{prefix}.json")
output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(args.output_dir, f"nbest_predictions_{prefix}.json")
if args.version_2_with_negative: if args.version_2_with_negative:
output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) output_null_log_odds_file = os.path.join(args.output_dir, f"null_odds_{prefix}.json")
else: else:
output_null_log_odds_file = None output_null_log_odds_file = None
@ -828,10 +827,10 @@ def main():
# Evaluate # Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step) result = evaluate(args, model, tokenizer, prefix=global_step)
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
results.update(result) results.update(result)
logger.info("Results: {}".format(results)) logger.info(f"Results: {results}")
return results return results

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #

View File

@ -20,10 +20,10 @@ def fill_mask(masked_input, model, tokenizer, topk=5):
topk_filled_outputs = [] topk_filled_outputs = []
for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")): for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")):
predicted_token = predicted_token_bpe.replace("\u2581", " ") predicted_token = predicted_token_bpe.replace("\u2581", " ")
if " {0}".format(masked_token) in masked_input: if f" {masked_token}" in masked_input:
topk_filled_outputs.append( topk_filled_outputs.append(
( (
masked_input.replace(" {0}".format(masked_token), predicted_token), masked_input.replace(f" {masked_token}", predicted_token),
values[index].item(), values[index].item(),
predicted_token, predicted_token,
) )

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import argparse import argparse
import json import json
from typing import List
from ltp import LTP from ltp import LTP
@ -42,7 +41,7 @@ def is_chinese(word: str):
return 1 return 1
def get_chinese_word(tokens: List[str]): def get_chinese_word(tokens: list[str]):
word_set = set() word_set = set()
for token in tokens: for token in tokens:
@ -53,7 +52,7 @@ def get_chinese_word(tokens: List[str]):
return word_list return word_list
def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()): def add_sub_symbol(bert_tokens: list[str], chinese_word_set: set()):
if not chinese_word_set: if not chinese_word_set:
return bert_tokens return bert_tokens
max_word_len = max([len(w) for w in chinese_word_set]) max_word_len = max([len(w) for w in chinese_word_set])
@ -77,7 +76,7 @@ def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
return bert_word return bert_word
def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer): def prepare_ref(lines: list[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer):
ltp_res = [] ltp_res = []
for i in range(0, len(lines), 100): for i in range(0, len(lines), 100):
@ -117,7 +116,7 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni
def main(args): def main(args):
# For Chinese (Ro)Bert, the best result is from : RoBERTa-wwm-ext (https://github.com/ymcui/Chinese-BERT-wwm) # For Chinese (Ro)Bert, the best result is from : RoBERTa-wwm-ext (https://github.com/ymcui/Chinese-BERT-wwm)
# If we want to fine-tune these model, we have to use same tokenizer : LTP (https://github.com/HIT-SCIR/ltp) # If we want to fine-tune these model, we have to use same tokenizer : LTP (https://github.com/HIT-SCIR/ltp)
with open(args.file_name, "r", encoding="utf-8") as f: with open(args.file_name, encoding="utf-8") as f:
data = f.readlines() data = f.readlines()
data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] # avoid delimiter like '\u2029' data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] # avoid delimiter like '\u2029'
ltp_tokenizer = LTP(args.ltp) # faster in GPU device ltp_tokenizer = LTP(args.ltp) # faster in GPU device

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -358,7 +357,7 @@ def main():
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key in sorted(result.keys()): for key in sorted(result.keys()):
logger.info(" %s = %s", key, str(result[key])) logger.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("{} = {}\n".format(key, str(result[key])))
results.update(result) results.update(result)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -163,7 +162,7 @@ def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count() n_gpu = torch.cuda.device_count()
logger.info("device: {}, n_gpu {}".format(device, n_gpu)) logger.info(f"device: {device}, n_gpu {n_gpu}")
if not args.do_train and not args.do_eval: if not args.do_train and not args.do_eval:
raise ValueError("At least one of `do_train` or `do_eval` must be True.") raise ValueError("At least one of `do_train` or `do_eval` must be True.")
@ -261,7 +260,7 @@ def main():
loss.item() if exp_average_loss is None else 0.7 * exp_average_loss + 0.3 * loss.item() loss.item() if exp_average_loss is None else 0.7 * exp_average_loss + 0.3 * loss.item()
) )
nb_tr_steps += 1 nb_tr_steps += 1
tqdm_bar.desc = "Training loss: {:.2e} lr: {:.2e}".format(exp_average_loss, scheduler.get_lr()[0]) tqdm_bar.desc = f"Training loss: {exp_average_loss:.2e} lr: {scheduler.get_lr()[0]:.2e}"
# Save a trained model # Save a trained model
if args.do_train: if args.do_train:
@ -313,7 +312,7 @@ def main():
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key in sorted(result.keys()): for key in sorted(result.keys()):
logger.info(" %s = %s", key, str(result[key])) logger.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("{} = {}\n".format(key, str(result[key])))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -51,7 +50,7 @@ except ImportError:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class SwagExample(object): class SwagExample:
"""A single training/test example for the SWAG dataset.""" """A single training/test example for the SWAG dataset."""
def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None): def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None):
@ -71,22 +70,22 @@ class SwagExample(object):
def __repr__(self): def __repr__(self):
attributes = [ attributes = [
"swag_id: {}".format(self.swag_id), f"swag_id: {self.swag_id}",
"context_sentence: {}".format(self.context_sentence), f"context_sentence: {self.context_sentence}",
"start_ending: {}".format(self.start_ending), f"start_ending: {self.start_ending}",
"ending_0: {}".format(self.endings[0]), f"ending_0: {self.endings[0]}",
"ending_1: {}".format(self.endings[1]), f"ending_1: {self.endings[1]}",
"ending_2: {}".format(self.endings[2]), f"ending_2: {self.endings[2]}",
"ending_3: {}".format(self.endings[3]), f"ending_3: {self.endings[3]}",
] ]
if self.label is not None: if self.label is not None:
attributes.append("label: {}".format(self.label)) attributes.append(f"label: {self.label}")
return ", ".join(attributes) return ", ".join(attributes)
class InputFeatures(object): class InputFeatures:
def __init__(self, example_id, choices_features, label): def __init__(self, example_id, choices_features, label):
self.example_id = example_id self.example_id = example_id
self.choices_features = [ self.choices_features = [
@ -97,7 +96,7 @@ class InputFeatures(object):
def read_swag_examples(input_file, is_training=True): def read_swag_examples(input_file, is_training=True):
with open(input_file, "r", encoding="utf-8") as f: with open(input_file, encoding="utf-8") as f:
lines = list(csv.reader(f)) lines = list(csv.reader(f))
if is_training and lines[0][-1] != "label": if is_training and lines[0][-1] != "label":
@ -179,15 +178,15 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, is_trainin
label = example.label label = example.label
if example_index < 5: if example_index < 5:
logger.info("*** Example ***") logger.info("*** Example ***")
logger.info("swag_id: {}".format(example.swag_id)) logger.info(f"swag_id: {example.swag_id}")
for choice_idx, (tokens, input_ids, input_mask, segment_ids) in enumerate(choices_features): for choice_idx, (tokens, input_ids, input_mask, segment_ids) in enumerate(choices_features):
logger.info("choice: {}".format(choice_idx)) logger.info(f"choice: {choice_idx}")
logger.info("tokens: {}".format(" ".join(tokens))) logger.info("tokens: {}".format(" ".join(tokens)))
logger.info("input_ids: {}".format(" ".join(map(str, input_ids)))) logger.info("input_ids: {}".format(" ".join(map(str, input_ids))))
logger.info("input_mask: {}".format(" ".join(map(str, input_mask)))) logger.info("input_mask: {}".format(" ".join(map(str, input_mask))))
logger.info("segment_ids: {}".format(" ".join(map(str, segment_ids)))) logger.info("segment_ids: {}".format(" ".join(map(str, segment_ids))))
if is_training: if is_training:
logger.info("label: {}".format(label)) logger.info(f"label: {label}")
features.append(InputFeatures(example_id=example.swag_id, choices_features=choices_features, label=label)) features.append(InputFeatures(example_id=example.swag_id, choices_features=choices_features, label=label))
@ -382,14 +381,14 @@ def train(args, train_dataset, model, tokenizer):
): # Only evaluate when single GPU otherwise metrics may not average well ): # Only evaluate when single GPU otherwise metrics may not average well
results = evaluate(args, model, tokenizer) results = evaluate(args, model, tokenizer)
for key, value in results.items(): for key, value in results.items():
tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar(f"eval_{key}", value, global_step)
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
logging_loss = tr_loss logging_loss = tr_loss
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
# Save model checkpoint # Save model checkpoint
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step)) output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
model_to_save = ( model_to_save = (
model.module if hasattr(model, "module") else model model.module if hasattr(model, "module") else model
) # Take care of distributed/parallel training ) # Take care of distributed/parallel training
@ -423,7 +422,7 @@ def evaluate(args, model, tokenizer, prefix=""):
eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
# Eval! # Eval!
logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(f"***** Running evaluation {prefix} *****")
logger.info(" Num examples = %d", len(dataset)) logger.info(" Num examples = %d", len(dataset))
logger.info(" Batch size = %d", args.eval_batch_size) logger.info(" Batch size = %d", args.eval_batch_size)
@ -466,7 +465,7 @@ def evaluate(args, model, tokenizer, prefix=""):
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key in sorted(result.keys()): for key in sorted(result.keys()):
logger.info("%s = %s", key, str(result[key])) logger.info("%s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("{} = {}\n".format(key, str(result[key])))
return result return result
@ -710,10 +709,10 @@ def main():
# Evaluate # Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step) result = evaluate(args, model, tokenizer, prefix=global_step)
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
results.update(result) results.update(result)
logger.info("Results: {}".format(results)) logger.info(f"Results: {results}")
return results return results

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -66,7 +65,7 @@ def main():
ptvsd.wait_for_attach() ptvsd.wait_for_attach()
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
logger.info("device: {}".format(device)) logger.info(f"device: {device}")
# Load a pre-processed dataset # Load a pre-processed dataset
# You can also build the corpus yourself using TransfoXLCorpus methods # You can also build the corpus yourself using TransfoXLCorpus methods
@ -111,7 +110,7 @@ def main():
total_loss += seq_len * loss.item() total_loss += seq_len * loss.item()
total_len += seq_len total_len += seq_len
total_time = time.time() - start_time total_time = time.time() - start_time
logger.info("Time : {:.2f}s, {:.2f}ms/segment".format(total_time, 1000 * total_time / (idx + 1))) logger.info(f"Time : {total_time:.2f}s, {1000 * total_time / (idx + 1):.2f}ms/segment")
return total_loss / total_len return total_loss / total_len
# Run on test data. # Run on test data.

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 Huggingface # Copyright 2020 Huggingface
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -13,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import io
import json import json
import unittest import unittest
@ -25,7 +23,7 @@ from utils import calculate_bleu
filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json" filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json"
with io.open(filename, "r", encoding="utf-8") as f: with open(filename, encoding="utf-8") as f:
bleu_data = json.load(f) bleu_data = json.load(f)

View File

@ -19,7 +19,6 @@ import time
from json import JSONDecodeError from json import JSONDecodeError
from logging import getLogger from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import Dict, List
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
@ -55,10 +54,10 @@ def eval_data_dir(
task="summarization", task="summarization",
local_rank=None, local_rank=None,
num_return_sequences=1, num_return_sequences=1,
dataset_kwargs: Dict = None, dataset_kwargs: dict = None,
prefix="", prefix="",
**generate_kwargs, **generate_kwargs,
) -> Dict: ) -> dict:
"""Run evaluation on part of the data for one gpu and save to {save_dir}/rank_{rank}_output.json""" """Run evaluation on part of the data for one gpu and save to {save_dir}/rank_{rank}_output.json"""
model_name = str(model_name) model_name = str(model_name)
assert local_rank is not None assert local_rank is not None
@ -211,7 +210,7 @@ def run_generate():
calc_bleu = "translation" in args.task calc_bleu = "translation" in args.task
score_fn = calculate_bleu if calc_bleu else calculate_rouge score_fn = calculate_bleu if calc_bleu else calculate_rouge
metric_name = "bleu" if calc_bleu else "rouge" metric_name = "bleu" if calc_bleu else "rouge"
metrics: Dict = score_fn(preds, labels) metrics: dict = score_fn(preds, labels)
metrics["n_obs"] = len(preds) metrics["n_obs"] = len(preds)
runtime = time.time() - start_time runtime = time.time() - start_time
metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4) metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4)
@ -227,7 +226,7 @@ def run_generate():
shutil.rmtree(json_save_dir) shutil.rmtree(json_save_dir)
def combine_partial_results(partial_results) -> List: def combine_partial_results(partial_results) -> list:
"""Concatenate partial results into one file, then sort it by id.""" """Concatenate partial results into one file, then sort it by id."""
records = [] records = []
for partial_result in partial_results: for partial_result in partial_results:
@ -237,7 +236,7 @@ def combine_partial_results(partial_results) -> List:
return preds return preds
def gather_results_from_each_node(num_replicas, save_dir, timeout) -> List[Dict[str, List]]: def gather_results_from_each_node(num_replicas, save_dir, timeout) -> list[dict[str, list]]:
# WAIT FOR lots of .json files # WAIT FOR lots of .json files
start_wait = time.time() start_wait = time.time()
logger.info("waiting for all nodes to finish") logger.info("waiting for all nodes to finish")

View File

@ -20,7 +20,6 @@ import time
import warnings import warnings
from logging import getLogger from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import Dict, List
import torch import torch
from tqdm import tqdm from tqdm import tqdm
@ -36,7 +35,7 @@ DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def generate_summaries_or_translations( def generate_summaries_or_translations(
examples: List[str], examples: list[str],
out_file: str, out_file: str,
model_name: str, model_name: str,
batch_size: int = 8, batch_size: int = 8,
@ -45,7 +44,7 @@ def generate_summaries_or_translations(
task="summarization", task="summarization",
prefix=None, prefix=None,
**generate_kwargs, **generate_kwargs,
) -> Dict: ) -> dict:
"""Save model.generate results to <out_file>, and return how long it took.""" """Save model.generate results to <out_file>, and return how long it took."""
fout = Path(out_file).open("w", encoding="utf-8") fout = Path(out_file).open("w", encoding="utf-8")
model_name = str(model_name) model_name = str(model_name)

View File

@ -34,7 +34,7 @@ task_score_names = {
def parse_search_arg(search): def parse_search_arg(search):
groups = search.split() groups = search.split()
entries = dict((g.split("=") for g in groups)) entries = dict(g.split("=") for g in groups)
entry_names = list(entries.keys()) entry_names = list(entries.keys())
sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()] sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
matrix = [list(x) for x in itertools.product(*sets)] matrix = [list(x) for x in itertools.product(*sets)]
@ -105,7 +105,7 @@ def run_search():
col_widths = {col: len(str(col)) for col in col_names} col_widths = {col: len(str(col)) for col in col_names}
results = [] results = []
for r in matrix: for r in matrix:
hparams = dict((x.replace("--", "").split() for x in r)) hparams = dict(x.replace("--", "").split() for x in r)
args_exp = " ".join(r).split() args_exp = " ".join(r).split()
args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
sys.argv = args_normal + args_exp sys.argv = args_normal + args_exp

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -172,10 +172,10 @@ class Seq2SeqTrainer(Trainer):
def prediction_step( def prediction_step(
self, self,
model: nn.Module, model: nn.Module,
inputs: Dict[str, Union[torch.Tensor, Any]], inputs: dict[str, Union[torch.Tensor, Any]],
prediction_loss_only: bool, prediction_loss_only: bool,
ignore_keys: Optional[List[str]] = None, ignore_keys: Optional[list[str]] = None,
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: ) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
""" """
Perform an evaluation step on :obj:`model` using obj:`inputs`. Perform an evaluation step on :obj:`model` using obj:`inputs`.

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
import io
import json import json
import subprocess import subprocess
@ -29,5 +28,5 @@ def get_all_data(pairs, n_objs):
text = get_all_data(pairs, n_objs) text = get_all_data(pairs, n_objs)
filename = "./fsmt_val_data.json" filename = "./fsmt_val_data.json"
with io.open(filename, "w", encoding="utf-8") as f: with open(filename, "w", encoding="utf-8") as f:
bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)

View File

@ -19,9 +19,10 @@ import math
import os import os
import pickle import pickle
import socket import socket
from collections.abc import Iterable
from logging import getLogger from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import Callable, Dict, Iterable, List, Tuple, Union from typing import Callable, Union
import git import git
import numpy as np import numpy as np
@ -67,7 +68,7 @@ def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=-100):
return loss, nll_loss return loss, nll_loss
def lmap(f: Callable, x: Iterable) -> List: def lmap(f: Callable, x: Iterable) -> list:
"""list(map(f, x))""" """list(map(f, x))"""
return list(map(f, x)) return list(map(f, x))
@ -77,11 +78,11 @@ def calculate_bleu(output_lns, refs_lns, **kwargs) -> dict:
return {"bleu": round(corpus_bleu(output_lns, [refs_lns], **kwargs).score, 4)} return {"bleu": round(corpus_bleu(output_lns, [refs_lns], **kwargs).score, 4)}
def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], Dict]: def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], dict]:
def non_pad_len(tokens: np.ndarray) -> int: def non_pad_len(tokens: np.ndarray) -> int:
return np.count_nonzero(tokens != tokenizer.pad_token_id) return np.count_nonzero(tokens != tokenizer.pad_token_id)
def decode_pred(pred: EvalPrediction) -> Tuple[List[str], List[str]]: def decode_pred(pred: EvalPrediction) -> tuple[list[str], list[str]]:
pred_ids = pred.predictions pred_ids = pred.predictions
label_ids = pred.label_ids label_ids = pred.label_ids
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True) pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
@ -91,16 +92,16 @@ def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) ->
label_str = lmap(str.strip, label_str) label_str = lmap(str.strip, label_str)
return pred_str, label_str return pred_str, label_str
def summarization_metrics(pred: EvalPrediction) -> Dict: def summarization_metrics(pred: EvalPrediction) -> dict:
pred_str, label_str = decode_pred(pred) pred_str, label_str = decode_pred(pred)
rouge: Dict = calculate_rouge(pred_str, label_str) rouge: dict = calculate_rouge(pred_str, label_str)
summ_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1) summ_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
rouge.update({"gen_len": summ_len}) rouge.update({"gen_len": summ_len})
return rouge return rouge
def translation_metrics(pred: EvalPrediction) -> Dict: def translation_metrics(pred: EvalPrediction) -> dict:
pred_str, label_str = decode_pred(pred) pred_str, label_str = decode_pred(pred)
bleu: Dict = calculate_bleu(pred_str, label_str) bleu: dict = calculate_bleu(pred_str, label_str)
gen_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1) gen_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
bleu.update({"gen_len": gen_len}) bleu.update({"gen_len": gen_len})
return bleu return bleu
@ -183,7 +184,7 @@ class AbstractSeq2SeqDataset(Dataset):
return min(self.src_lens[i], self.max_target_length) return min(self.src_lens[i], self.max_target_length)
# call fairseq cython function # call fairseq cython function
batch_sampler: List[List[int]] = batch_by_size( batch_sampler: list[list[int]] = batch_by_size(
sorted_indices, sorted_indices,
num_tokens_fn=num_tokens_in_example, num_tokens_fn=num_tokens_in_example,
max_tokens=max_tokens_per_batch, max_tokens=max_tokens_per_batch,
@ -207,7 +208,7 @@ class AbstractSeq2SeqDataset(Dataset):
class LegacySeq2SeqDataset(AbstractSeq2SeqDataset): class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
def __getitem__(self, index) -> Dict[str, torch.Tensor]: def __getitem__(self, index) -> dict[str, torch.Tensor]:
"""Call tokenizer on src and tgt_lines""" """Call tokenizer on src and tgt_lines"""
index = index + 1 # linecache starts at 1 index = index + 1 # linecache starts at 1
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n") source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
@ -237,7 +238,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
**self.dataset_kwargs, **self.dataset_kwargs,
) )
def collate_fn(self, batch) -> Dict[str, torch.Tensor]: def collate_fn(self, batch) -> dict[str, torch.Tensor]:
input_ids = torch.stack([x["input_ids"] for x in batch]) input_ids = torch.stack([x["input_ids"] for x in batch])
masks = torch.stack([x["attention_mask"] for x in batch]) masks = torch.stack([x["attention_mask"] for x in batch])
target_ids = torch.stack([x["labels"] for x in batch]) target_ids = torch.stack([x["labels"] for x in batch])
@ -255,7 +256,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
class Seq2SeqDataset(AbstractSeq2SeqDataset): class Seq2SeqDataset(AbstractSeq2SeqDataset):
"""A dataset that calls prepare_seq2seq_batch.""" """A dataset that calls prepare_seq2seq_batch."""
def __getitem__(self, index) -> Dict[str, str]: def __getitem__(self, index) -> dict[str, str]:
index = index + 1 # linecache starts at 1 index = index + 1 # linecache starts at 1
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n") source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
tgt_line = linecache.getline(str(self.tgt_file), index).rstrip("\n") tgt_line = linecache.getline(str(self.tgt_file), index).rstrip("\n")
@ -263,9 +264,9 @@ class Seq2SeqDataset(AbstractSeq2SeqDataset):
assert tgt_line, f"empty tgt line for index {index}" assert tgt_line, f"empty tgt line for index {index}"
return {"tgt_texts": tgt_line, "src_texts": source_line, "id": index - 1} return {"tgt_texts": tgt_line, "src_texts": source_line, "id": index - 1}
def collate_fn(self, batch) -> Dict[str, torch.Tensor]: def collate_fn(self, batch) -> dict[str, torch.Tensor]:
"""Call prepare_seq2seq_batch.""" """Call prepare_seq2seq_batch."""
batch_encoding: Dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch( batch_encoding: dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch(
[x["src_texts"] for x in batch], [x["src_texts"] for x in batch],
tgt_texts=[x["tgt_texts"] for x in batch], tgt_texts=[x["tgt_texts"] for x in batch],
max_length=self.max_source_length, max_length=self.max_source_length,
@ -293,7 +294,7 @@ class Seq2SeqDataCollator:
if data_args.tgt_lang is not None: if data_args.tgt_lang is not None:
self.dataset_kwargs["tgt_lang"] = data_args.tgt_lang self.dataset_kwargs["tgt_lang"] = data_args.tgt_lang
def __call__(self, batch) -> Dict[str, torch.Tensor]: def __call__(self, batch) -> dict[str, torch.Tensor]:
if hasattr(self.tokenizer, "prepare_seq2seq_batch"): if hasattr(self.tokenizer, "prepare_seq2seq_batch"):
batch = self._encode(batch) batch = self._encode(batch)
input_ids, attention_mask, labels = ( input_ids, attention_mask, labels = (
@ -329,7 +330,7 @@ class Seq2SeqDataCollator:
shifted_input_ids[..., 0] = self.pad_token_id shifted_input_ids[..., 0] = self.pad_token_id
return shifted_input_ids return shifted_input_ids
def _encode(self, batch) -> Dict[str, torch.Tensor]: def _encode(self, batch) -> dict[str, torch.Tensor]:
batch_encoding = self.tokenizer.prepare_seq2seq_batch( batch_encoding = self.tokenizer.prepare_seq2seq_batch(
[x["src_texts"] for x in batch], [x["src_texts"] for x in batch],
tgt_texts=[x["tgt_texts"] for x in batch], tgt_texts=[x["tgt_texts"] for x in batch],
@ -355,7 +356,7 @@ class SortishSampler(Sampler):
return iter(sortish_sampler_indices(self.data, self.bs, shuffle=self.shuffle)) return iter(sortish_sampler_indices(self.data, self.bs, shuffle=self.shuffle))
def sortish_sampler_indices(data: List, bs: int, shuffle=True) -> np.array: def sortish_sampler_indices(data: list, bs: int, shuffle=True) -> np.array:
"Go through the text data by order of src length with a bit of randomness. From fastai repo." "Go through the text data by order of src length with a bit of randomness. From fastai repo."
if not shuffle: if not shuffle:
return np.argsort(np.array(data) * -1) return np.argsort(np.array(data) * -1)
@ -455,7 +456,7 @@ def pickle_save(obj, path):
return pickle.dump(obj, f) return pickle.dump(obj, f)
def flatten_list(summary_ids: List[List]): def flatten_list(summary_ids: list[list]):
return list(itertools.chain.from_iterable(summary_ids)) return list(itertools.chain.from_iterable(summary_ids))
@ -506,14 +507,14 @@ def extract_rouge_mid_statistics(dct):
def calculate_rouge( def calculate_rouge(
pred_lns: List[str], pred_lns: list[str],
tgt_lns: List[str], tgt_lns: list[str],
use_stemmer=True, use_stemmer=True,
rouge_keys=ROUGE_KEYS, rouge_keys=ROUGE_KEYS,
return_precision_and_recall=False, return_precision_and_recall=False,
bootstrap_aggregation=True, bootstrap_aggregation=True,
newline_sep=True, newline_sep=True,
) -> Dict: ) -> dict:
"""Calculate rouge using rouge_scorer package. """Calculate rouge using rouge_scorer package.
Args: Args:
@ -590,19 +591,19 @@ def any_requires_grad(model: nn.Module) -> bool:
def assert_all_frozen(model): def assert_all_frozen(model):
model_grads: List[bool] = list(grad_status(model)) model_grads: list[bool] = list(grad_status(model))
n_require_grad = sum(lmap(int, model_grads)) n_require_grad = sum(lmap(int, model_grads))
npars = len(model_grads) npars = len(model_grads)
assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad" assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad"
def assert_not_all_frozen(model): def assert_not_all_frozen(model):
model_grads: List[bool] = list(grad_status(model)) model_grads: list[bool] = list(grad_status(model))
npars = len(model_grads) npars = len(model_grads)
assert any(model_grads), f"none of {npars} weights require grad" assert any(model_grads), f"none of {npars} weights require grad"
def parse_numeric_n_bool_cl_kwargs(unparsed_args: List[str]) -> Dict[str, Union[int, float, bool]]: def parse_numeric_n_bool_cl_kwargs(unparsed_args: list[str]) -> dict[str, Union[int, float, bool]]:
""" """
Parse an argv list of unspecified command line args to a dict. Parse an argv list of unspecified command line args to a dict.
Assumes all values are either numeric or boolean in the form of true/false. Assumes all values are either numeric or boolean in the form of true/false.

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -20,7 +19,7 @@ import os
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from importlib import import_module from importlib import import_module
from typing import Dict, List, Optional, Tuple from typing import Optional
import numpy as np import numpy as np
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
@ -159,7 +158,7 @@ def main():
# Prepare CONLL-2003 task # Prepare CONLL-2003 task
labels = token_classification_task.get_labels(data_args.labels) labels = token_classification_task.get_labels(data_args.labels)
label_map: Dict[int, str] = dict(enumerate(labels)) label_map: dict[int, str] = dict(enumerate(labels))
num_labels = len(labels) num_labels = len(labels)
# Load pretrained model and tokenizer # Load pretrained model and tokenizer
@ -217,7 +216,7 @@ def main():
else None else None
) )
def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[List[int], List[int]]: def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> tuple[list[int], list[int]]:
preds = np.argmax(predictions, axis=2) preds = np.argmax(predictions, axis=2)
batch_size, seq_len = preds.shape batch_size, seq_len = preds.shape
@ -233,7 +232,7 @@ def main():
return preds_list, out_label_list return preds_list, out_label_list
def compute_metrics(p: EvalPrediction) -> Dict: def compute_metrics(p: EvalPrediction) -> dict:
preds_list, out_label_list = align_predictions(p.predictions, p.label_ids) preds_list, out_label_list = align_predictions(p.predictions, p.label_ids)
return { return {
"accuracy_score": accuracy_score(out_label_list, preds_list), "accuracy_score": accuracy_score(out_label_list, preds_list),
@ -279,7 +278,7 @@ def main():
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in result.items(): for key, value in result.items():
logger.info(" %s = %s", key, value) logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value)) writer.write("{} = {}\n".format(key, value))
results.update(result) results.update(result)
@ -304,13 +303,13 @@ def main():
with open(output_test_results_file, "w") as writer: with open(output_test_results_file, "w") as writer:
for key, value in metrics.items(): for key, value in metrics.items():
logger.info(" %s = %s", key, value) logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value)) writer.write("{} = {}\n".format(key, value))
# Save predictions # Save predictions
output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt") output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_test_predictions_file, "w") as writer: with open(output_test_predictions_file, "w") as writer:
with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f: with open(os.path.join(data_args.data_dir, "test.txt")) as f:
token_classification_task.write_predictions_to_file(writer, f, preds_list) token_classification_task.write_predictions_to_file(writer, f, preds_list)
return results return results

View File

@ -12,7 +12,7 @@ subword_len_counter = 0
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
max_len -= tokenizer.num_special_tokens_to_add() max_len -= tokenizer.num_special_tokens_to_add()
with open(dataset, "rt") as f_p: with open(dataset) as f_p:
for line in f_p: for line in f_p:
line = line.rstrip() line = line.rstrip()

View File

@ -1,6 +1,6 @@
import logging import logging
import os import os
from typing import List, TextIO, Union from typing import TextIO, Union
from conllu import parse_incr from conllu import parse_incr
from utils_ner import InputExample, Split, TokenClassificationTask from utils_ner import InputExample, Split, TokenClassificationTask
@ -14,7 +14,7 @@ class NER(TokenClassificationTask):
# in NER datasets, the last column is usually reserved for NER label # in NER datasets, the last column is usually reserved for NER label
self.label_idx = label_idx self.label_idx = label_idx
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]: def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
if isinstance(mode, Split): if isinstance(mode, Split):
mode = mode.value mode = mode.value
file_path = os.path.join(data_dir, f"{mode}.txt") file_path = os.path.join(data_dir, f"{mode}.txt")
@ -42,7 +42,7 @@ class NER(TokenClassificationTask):
examples.append(InputExample(guid=f"{mode}-{guid_index}", words=words, labels=labels)) examples.append(InputExample(guid=f"{mode}-{guid_index}", words=words, labels=labels))
return examples return examples
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List): def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
example_id = 0 example_id = 0
for line in test_input_reader: for line in test_input_reader:
if line.startswith("-DOCSTART-") or line == "" or line == "\n": if line.startswith("-DOCSTART-") or line == "" or line == "\n":
@ -55,9 +55,9 @@ class NER(TokenClassificationTask):
else: else:
logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0]) logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0])
def get_labels(self, path: str) -> List[str]: def get_labels(self, path: str) -> list[str]:
if path: if path:
with open(path, "r") as f: with open(path) as f:
labels = f.read().splitlines() labels = f.read().splitlines()
if "O" not in labels: if "O" not in labels:
labels = ["O"] + labels labels = ["O"] + labels
@ -71,9 +71,9 @@ class Chunk(NER):
# in CONLL2003 dataset chunk column is second-to-last # in CONLL2003 dataset chunk column is second-to-last
super().__init__(label_idx=-2) super().__init__(label_idx=-2)
def get_labels(self, path: str) -> List[str]: def get_labels(self, path: str) -> list[str]:
if path: if path:
with open(path, "r") as f: with open(path) as f:
labels = f.read().splitlines() labels = f.read().splitlines()
if "O" not in labels: if "O" not in labels:
labels = ["O"] + labels labels = ["O"] + labels
@ -105,7 +105,7 @@ class Chunk(NER):
class POS(TokenClassificationTask): class POS(TokenClassificationTask):
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]: def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
if isinstance(mode, Split): if isinstance(mode, Split):
mode = mode.value mode = mode.value
file_path = os.path.join(data_dir, f"{mode}.txt") file_path = os.path.join(data_dir, f"{mode}.txt")
@ -125,7 +125,7 @@ class POS(TokenClassificationTask):
guid_index += 1 guid_index += 1
return examples return examples
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List): def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
example_id = 0 example_id = 0
for sentence in parse_incr(test_input_reader): for sentence in parse_incr(test_input_reader):
s_p = preds_list[example_id] s_p = preds_list[example_id]
@ -136,9 +136,9 @@ class POS(TokenClassificationTask):
writer.write(out) writer.write(out)
example_id += 1 example_id += 1
def get_labels(self, path: str) -> List[str]: def get_labels(self, path: str) -> list[str]:
if path: if path:
with open(path, "r") as f: with open(path) as f:
return f.read().splitlines() return f.read().splitlines()
else: else:
return [ return [

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -19,7 +18,7 @@ import logging
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Optional, Union from typing import Optional, Union
from filelock import FileLock from filelock import FileLock
@ -42,8 +41,8 @@ class InputExample:
""" """
guid: str guid: str
words: List[str] words: list[str]
labels: Optional[List[str]] labels: Optional[list[str]]
@dataclass @dataclass
@ -53,10 +52,10 @@ class InputFeatures:
Property names are the same names as the corresponding inputs to a model. Property names are the same names as the corresponding inputs to a model.
""" """
input_ids: List[int] input_ids: list[int]
attention_mask: List[int] attention_mask: list[int]
token_type_ids: Optional[List[int]] = None token_type_ids: Optional[list[int]] = None
label_ids: Optional[List[int]] = None label_ids: Optional[list[int]] = None
class Split(Enum): class Split(Enum):
@ -67,17 +66,17 @@ class Split(Enum):
class TokenClassificationTask: class TokenClassificationTask:
@staticmethod @staticmethod
def read_examples_from_file(data_dir, mode: Union[Split, str]) -> List[InputExample]: def read_examples_from_file(data_dir, mode: Union[Split, str]) -> list[InputExample]:
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def get_labels(path: str) -> List[str]: def get_labels(path: str) -> list[str]:
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def convert_examples_to_features( def convert_examples_to_features(
examples: List[InputExample], examples: list[InputExample],
label_list: List[str], label_list: list[str],
max_seq_length: int, max_seq_length: int,
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
cls_token_at_end=False, cls_token_at_end=False,
@ -91,7 +90,7 @@ class TokenClassificationTask:
pad_token_label_id=-100, pad_token_label_id=-100,
sequence_a_segment_id=0, sequence_a_segment_id=0,
mask_padding_with_zero=True, mask_padding_with_zero=True,
) -> List[InputFeatures]: ) -> list[InputFeatures]:
"""Loads a data file into a list of `InputFeatures` """Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token: `cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP] - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
@ -214,7 +213,7 @@ if is_torch_available():
soon. soon.
""" """
features: List[InputFeatures] features: list[InputFeatures]
pad_token_label_id: int = nn.CrossEntropyLoss().ignore_index pad_token_label_id: int = nn.CrossEntropyLoss().ignore_index
# Use cross entropy ignore_index as padding label id so that only # Use cross entropy ignore_index as padding label id so that only
# real label ids contribute to the loss later. # real label ids contribute to the loss later.
@ -224,7 +223,7 @@ if is_torch_available():
token_classification_task: TokenClassificationTask, token_classification_task: TokenClassificationTask,
data_dir: str, data_dir: str,
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
labels: List[str], labels: list[str],
model_type: str, model_type: str,
max_seq_length: Optional[int] = None, max_seq_length: Optional[int] = None,
overwrite_cache=False, overwrite_cache=False,
@ -233,7 +232,7 @@ if is_torch_available():
# Load data features from cache or dataset file # Load data features from cache or dataset file
cached_features_file = os.path.join( cached_features_file = os.path.join(
data_dir, data_dir,
"cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)), f"cached_{mode.value}_{tokenizer.__class__.__name__}_{str(max_seq_length)}",
) )
# Make sure only the first process in distributed training processes the dataset, # Make sure only the first process in distributed training processes the dataset,
@ -283,7 +282,7 @@ if is_tf_available():
soon. soon.
""" """
features: List[InputFeatures] features: list[InputFeatures]
pad_token_label_id: int = -100 pad_token_label_id: int = -100
# Use cross entropy ignore_index as padding label id so that only # Use cross entropy ignore_index as padding label id so that only
# real label ids contribute to the loss later. # real label ids contribute to the loss later.
@ -293,7 +292,7 @@ if is_tf_available():
token_classification_task: TokenClassificationTask, token_classification_task: TokenClassificationTask,
data_dir: str, data_dir: str,
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
labels: List[str], labels: list[str],
model_type: str, model_type: str,
max_seq_length: Optional[int] = None, max_seq_length: Optional[int] = None,
overwrite_cache=False, overwrite_cache=False,

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the # the file from the modular. If any change should be done, please apply the change to the
# modular_new_imgproc_model.py file directly. One of our CI enforces this. # modular_new_imgproc_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Dict, List, Optional, Union from typing import Optional, Union
import numpy as np import numpy as np
import torch import torch
@ -74,13 +74,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
def __init__( def __init__(
self, self,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BICUBIC, resample: PILImageResampling = PILImageResampling.BICUBIC,
do_rescale: bool = True, do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255, rescale_factor: Union[int, float] = 1 / 255,
do_normalize: bool = True, do_normalize: bool = True,
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, list[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, list[float]]] = None,
do_convert_rgb: bool = True, do_convert_rgb: bool = True,
**kwargs, **kwargs,
) -> None: ) -> None:
@ -101,7 +101,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
def resize( def resize(
self, self,
image: np.ndarray, image: np.ndarray,
size: Dict[str, int], size: dict[str, int],
resample: PILImageResampling = PILImageResampling.BICUBIC, resample: PILImageResampling = PILImageResampling.BICUBIC,
data_format: Optional[Union[str, ChannelDimension]] = None, data_format: Optional[Union[str, ChannelDimension]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
@ -151,13 +151,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
self, self,
images: ImageInput, images: ImageInput,
do_resize: Optional[bool] = None, do_resize: Optional[bool] = None,
size: Optional[Dict[str, int]] = None, size: Optional[dict[str, int]] = None,
resample: PILImageResampling = None, resample: PILImageResampling = None,
do_rescale: Optional[bool] = None, do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None, rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None, do_normalize: Optional[bool] = None,
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, list[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, list[float]]] = None,
return_tensors: Optional[Union[str, TensorType]] = None, return_tensors: Optional[Union[str, TensorType]] = None,
do_convert_rgb: bool = None, do_convert_rgb: bool = None,
data_format: ChannelDimension = ChannelDimension.FIRST, data_format: ChannelDimension = ChannelDimension.FIRST,

View File

@ -5,7 +5,7 @@
# modular_add_function.py file directly. One of our CI enforces this. # modular_add_function.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Note that zamba does not have the `apply_rotary_pos_emb` function! # Note that zamba does not have the `apply_rotary_pos_emb` function!
from typing import Optional, Tuple from typing import Optional
import torch import torch
from torch import nn from torch import nn
@ -62,5 +62,5 @@ class TestAttention(nn.Module):
def __init__(self): def __init__(self):
pass pass
def forward(self) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: def forward(self) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
_ = apply_rotary_pos_emb(1, 1, 1, 1) _ = apply_rotary_pos_emb(1, 1, 1, 1)

View File

@ -5,7 +5,7 @@
# modular_dummy.py file directly. One of our CI enforces this. # modular_dummy.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from functools import partial from functools import partial
from typing import Callable, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -223,12 +223,12 @@ class DummyAttention(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor], position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor], attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1] input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim) hidden_shape = (*input_shape, -1, self.head_dim)
@ -290,9 +290,9 @@ class DummyDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
@ -494,7 +494,7 @@ class DummyModel(DummyPreTrainedModel):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**flash_attn_kwargs: Unpack[FlashAttentionKwargs], **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
) -> Union[Tuple, BaseModelOutputWithPast]: ) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

View File

@ -6,7 +6,7 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
import math import math
import os import os
from typing import List, Optional, Tuple, Union from typing import Optional, Union
import torch import torch
from packaging import version from packaging import version
@ -136,9 +136,9 @@ class DummyBertSelfAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
mixed_query_layer = self.query(hidden_states) mixed_query_layer = self.query(hidden_states)
# If this is instantiated as a cross-attention module, the keys # If this is instantiated as a cross-attention module, the keys
@ -245,9 +245,9 @@ class DummyBertSdpaSelfAttention(DummyBertSelfAttention):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None: if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented. # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
logger.warning_once( logger.warning_once(
@ -386,9 +386,9 @@ class DummyBertAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
self_outputs = self.self( self_outputs = self.self(
hidden_states, hidden_states,
attention_mask, attention_mask,
@ -454,9 +454,9 @@ class DummyBertLayer(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
self_attention_outputs = self.attention( self_attention_outputs = self.attention(
@ -532,12 +532,12 @@ class DummyBertEncoder(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True, return_dict: Optional[bool] = True,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -858,12 +858,12 @@ class DummyBertModel(DummyBertPreTrainedModel):
inputs_embeds: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None, past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
r""" r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the # the file from the modular. If any change should be done, please apply the change to the
# modular_from_uppercase_model.py file directly. One of our CI enforces this. # modular_from_uppercase_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Optional, Tuple from typing import Optional
import torch import torch
from torch import nn from torch import nn
@ -53,7 +53,7 @@ class FromUppercaseModelAttention(nn.Module):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
bsz, tgt_len, embed_dim = hidden_states.size() bsz, tgt_len, embed_dim = hidden_states.size()
@ -148,7 +148,7 @@ class FromUppercaseModelFlashAttention2(FromUppercaseModelAttention):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
output_attentions = False output_attentions = False
batch_size, q_len, _ = hidden_states.size() batch_size, q_len, _ = hidden_states.size()
@ -226,7 +226,7 @@ class FromUppercaseModelSdpaAttention(FromUppercaseModelAttention):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
if output_attentions: if output_attentions:
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
logger.warning_once( logger.warning_once(
@ -322,7 +322,7 @@ class FromUppercaseModelEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor, causal_attention_mask: torch.Tensor,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.FloatTensor]: ) -> tuple[torch.FloatTensor]:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

View File

@ -5,7 +5,7 @@
# modular_multimodal1.py file directly. One of our CI enforces this. # modular_multimodal1.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from functools import partial from functools import partial
from typing import Callable, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -223,12 +223,12 @@ class Multimodal1TextAttention(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor], position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor], attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1] input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim) hidden_shape = (*input_shape, -1, self.head_dim)
@ -290,9 +290,9 @@ class Multimodal1TextDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
@ -494,7 +494,7 @@ class Multimodal1TextModel(Multimodal1TextPreTrainedModel):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**flash_attn_kwargs: Unpack[FlashAttentionKwargs], **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
) -> Union[Tuple, BaseModelOutputWithPast]: ) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

View File

@ -5,7 +5,7 @@
# modular_multimodal2.py file directly. One of our CI enforces this. # modular_multimodal2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Optional, Tuple, Union from typing import Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -65,7 +65,7 @@ class Multimodal2VisionAttention(nn.Module):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
bsz, tgt_len, embed_dim = hidden_states.size() bsz, tgt_len, embed_dim = hidden_states.size()
@ -152,7 +152,7 @@ class Multimodal2VisionSdpaAttention(Multimodal2VisionAttention):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
if output_attentions: if output_attentions:
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
logger.warning_once( logger.warning_once(
@ -233,7 +233,7 @@ class Multimodal2VisionFlashAttention2(Multimodal2VisionAttention):
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
output_attentions = False output_attentions = False
batch_size, q_len, _ = hidden_states.size() batch_size, q_len, _ = hidden_states.size()
@ -334,7 +334,7 @@ class Multimodal2VisionEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor, causal_attention_mask: torch.Tensor,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.FloatTensor]: ) -> tuple[torch.FloatTensor]:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -392,7 +392,7 @@ class Multimodal2VisionEncoder(nn.Module):
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutput]: ) -> Union[tuple, BaseModelOutput]:
r""" r"""
Args: Args:
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
@ -587,7 +587,7 @@ class Multimodal2VisionTransformer(nn.Module):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = False, interpolate_pos_encoding: Optional[bool] = False,
) -> Union[Tuple, BaseModelOutputWithPooling]: ) -> Union[tuple, BaseModelOutputWithPooling]:
r""" r"""
Returns: Returns:
@ -671,7 +671,7 @@ class Multimodal2VisionModel(Multimodal2VisionPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPooling]: ) -> Union[tuple, BaseModelOutputWithPooling]:
r""" r"""
Returns: Returns:

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the # the file from the modular. If any change should be done, please apply the change to the
# modular_my_new_model2.py file directly. One of our CI enforces this. # modular_my_new_model2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Callable, List, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -222,12 +222,12 @@ class MyNewModel2Attention(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor], position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor], attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1] input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim) hidden_shape = (*input_shape, -1, self.head_dim)
@ -289,9 +289,9 @@ class MyNewModel2DecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
@ -485,7 +485,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
input_ids: torch.LongTensor = None, input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
@ -493,7 +493,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs, # NOOP kwarg for now **kwargs, # NOOP kwarg for now
) -> Union[Tuple, BaseModelOutputWithPast]: ) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -753,14 +753,14 @@ class MyNewModel2ForSequenceClassification(MyNewModel2PreTrainedModel):
input_ids: Optional[torch.LongTensor] = None, input_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None, labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple, SequenceClassifierOutputWithPast]: ) -> Union[tuple, SequenceClassifierOutputWithPast]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -5,7 +5,7 @@
# modular_new_task_model.py file directly. One of our CI enforces this. # modular_new_task_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from dataclasses import dataclass from dataclasses import dataclass
from typing import ClassVar, List, Optional, Tuple, Union from typing import ClassVar, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -61,9 +61,9 @@ class NewTaskModelCausalLMOutputWithPast(ModelOutput):
loss: Optional[torch.FloatTensor] = None loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None logits: torch.FloatTensor = None
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None
image_hidden_states: Optional[torch.FloatTensor] = None image_hidden_states: Optional[torch.FloatTensor] = None
@ -337,7 +337,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
pixel_values: torch.FloatTensor = None, pixel_values: torch.FloatTensor = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
token_type_ids: Optional[torch.LongTensor] = None, token_type_ids: Optional[torch.LongTensor] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,
@ -347,7 +347,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
num_logits_to_keep: int = 0, num_logits_to_keep: int = 0,
) -> Union[Tuple, NewTaskModelCausalLMOutputWithPast]: ) -> Union[tuple, NewTaskModelCausalLMOutputWithPast]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -6,7 +6,7 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
import math import math
import os import os
from typing import List, Optional, Tuple, Union from typing import Optional, Union
import torch import torch
import torch.nn as nn import torch.nn as nn
@ -139,9 +139,9 @@ class RobertaSelfAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
mixed_query_layer = self.query(hidden_states) mixed_query_layer = self.query(hidden_states)
# If this is instantiated as a cross-attention module, the keys # If this is instantiated as a cross-attention module, the keys
@ -248,9 +248,9 @@ class RobertaSdpaSelfAttention(RobertaSelfAttention):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None: if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented. # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
logger.warning_once( logger.warning_once(
@ -389,9 +389,9 @@ class RobertaAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
self_outputs = self.self( self_outputs = self.self(
hidden_states, hidden_states,
attention_mask, attention_mask,
@ -457,9 +457,9 @@ class RobertaLayer(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]: ) -> tuple[torch.Tensor]:
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
self_attention_outputs = self.attention( self_attention_outputs = self.attention(
@ -535,12 +535,12 @@ class RobertaEncoder(nn.Module):
head_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True, return_dict: Optional[bool] = True,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -861,12 +861,12 @@ class RobertaModel(RobertaPreTrainedModel):
inputs_embeds: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None, past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
r""" r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the # the file from the modular. If any change should be done, please apply the change to the
# modular_super.py file directly. One of our CI enforces this. # modular_super.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Callable, List, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
from torch import nn from torch import nn
@ -222,12 +222,12 @@ class SuperAttention(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor], position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor], attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1] input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim) hidden_shape = (*input_shape, -1, self.head_dim)
@ -289,9 +289,9 @@ class SuperDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
@ -485,14 +485,14 @@ class SuperModel(SuperPreTrainedModel):
input_ids: torch.LongTensor = None, input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
) -> Union[Tuple, BaseModelOutputWithPast]: ) -> Union[tuple, BaseModelOutputWithPast]:
out = super().forward( out = super().forward(
input_ids, input_ids,
attention_mask, attention_mask,

View File

@ -5,7 +5,7 @@
# modular_switch_function.py file directly. One of our CI enforces this. # modular_switch_function.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Note that llama and cohere have different definitions for rotate_half # Note that llama and cohere have different definitions for rotate_half
from typing import Callable, Optional, Tuple from typing import Callable, Optional
import torch import torch
from torch import nn from torch import nn
@ -123,12 +123,12 @@ class SwitchFunctionAttention(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor], position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor], attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs], **kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1] input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim) hidden_shape = (*input_shape, -1, self.head_dim)

View File

@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Union from typing import Optional, Union
import torch import torch
@ -18,10 +18,10 @@ class DummyBertModel(BertModel):
inputs_embeds: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None, past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
return super().forward(input_ids) return super().forward(input_ids)

View File

@ -1,4 +1,4 @@
from typing import ClassVar, List, Optional, Union from typing import ClassVar, Optional, Union
import torch import torch
import torch.utils.checkpoint import torch.utils.checkpoint
@ -29,7 +29,7 @@ class NewTaskModelForNewTask(PaliGemmaForConditionalGeneration):
pixel_values: torch.FloatTensor = None, pixel_values: torch.FloatTensor = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
token_type_ids: Optional[torch.LongTensor] = None, token_type_ids: Optional[torch.LongTensor] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,

View File

@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Union from typing import Optional, Union
import torch import torch
@ -15,14 +15,14 @@ class SuperModel(LlamaModel):
input_ids: torch.LongTensor = None, input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None, use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None, output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
) -> Union[Tuple, CausalLMOutputWithPast]: ) -> Union[tuple, CausalLMOutputWithPast]:
out = super().forward( out = super().forward(
input_ids, input_ids,
attention_mask, attention_mask,

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved. # Copyright 2022 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved. # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved. # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved. # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -18,9 +17,10 @@
import logging import logging
import os import os
import sys import sys
from collections.abc import Mapping
from dataclasses import dataclass, field from dataclasses import dataclass, field
from functools import partial from functools import partial
from typing import Any, Dict, List, Mapping, Optional from typing import Any, Optional
import albumentations as A import albumentations as A
import numpy as np import numpy as np
@ -200,7 +200,7 @@ class Evaluator:
def reset_metric(self): def reset_metric(self):
self.metric.reset() self.metric.reset()
def postprocess_target_batch(self, target_batch) -> List[Dict[str, torch.Tensor]]: def postprocess_target_batch(self, target_batch) -> list[dict[str, torch.Tensor]]:
"""Collect targets in a form of list of dictionaries with keys "masks", "labels".""" """Collect targets in a form of list of dictionaries with keys "masks", "labels"."""
batch_masks = target_batch[0] batch_masks = target_batch[0]
batch_labels = target_batch[1] batch_labels = target_batch[1]
@ -214,13 +214,13 @@ class Evaluator:
) )
return post_processed_targets return post_processed_targets
def get_target_sizes(self, post_processed_targets) -> List[List[int]]: def get_target_sizes(self, post_processed_targets) -> list[list[int]]:
target_sizes = [] target_sizes = []
for target in post_processed_targets: for target in post_processed_targets:
target_sizes.append(target["masks"].shape[-2:]) target_sizes.append(target["masks"].shape[-2:])
return target_sizes return target_sizes
def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> List[Dict[str, torch.Tensor]]: def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> list[dict[str, torch.Tensor]]:
"""Collect predictions in a form of list of dictionaries with keys "masks", "labels", "scores".""" """Collect predictions in a form of list of dictionaries with keys "masks", "labels", "scores"."""
model_output = ModelOutput(class_queries_logits=prediction_batch[0], masks_queries_logits=prediction_batch[1]) model_output = ModelOutput(class_queries_logits=prediction_batch[0], masks_queries_logits=prediction_batch[1])

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -21,9 +20,10 @@ import logging
import math import math
import os import os
import sys import sys
from collections.abc import Mapping
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any, Mapping from typing import Any
import albumentations as A import albumentations as A
import datasets import datasets

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. # Copyright 2020 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -551,7 +550,7 @@ def main():
covariance_matrix=1e-5 * sigma, covariance_matrix=1e-5 * sigma,
) )
new_token_embeddings = torch.stack( new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))), tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0, dim=0,
) )
else: else:
@ -571,7 +570,7 @@ def main():
covariance_matrix=1e-5 * sigma, covariance_matrix=1e-5 * sigma,
) )
new_token_embeddings = torch.stack( new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))), tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0, dim=0,
) )

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -518,7 +517,7 @@ def main():
covariance_matrix=1e-5 * sigma, covariance_matrix=1e-5 * sigma,
) )
new_token_embeddings = torch.stack( new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))), tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0, dim=0,
) )
else: else:
@ -538,7 +537,7 @@ def main():
covariance_matrix=1e-5 * sigma, covariance_matrix=1e-5 * sigma,
) )
new_token_embeddings = torch.stack( new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))), tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0, dim=0,
) )

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -18,9 +17,10 @@
import logging import logging
import os import os
import sys import sys
from collections.abc import Mapping
from dataclasses import dataclass, field from dataclasses import dataclass, field
from functools import partial from functools import partial
from typing import Any, List, Mapping, Optional, Tuple, Union from typing import Any, Optional, Union
import albumentations as A import albumentations as A
import numpy as np import numpy as np
@ -60,7 +60,7 @@ class ModelOutput:
def format_image_annotations_as_coco( def format_image_annotations_as_coco(
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]] image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
) -> dict: ) -> dict:
"""Format one set of image annotations to the COCO format """Format one set of image annotations to the COCO format
@ -94,7 +94,7 @@ def format_image_annotations_as_coco(
} }
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor: def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
""" """
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
@ -148,7 +148,7 @@ def augment_and_transform_batch(
return result return result
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]: def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
data = {} data = {}
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
data["labels"] = [x["labels"] for x in batch] data["labels"] = [x["labels"] for x in batch]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -19,9 +18,10 @@ import json
import logging import logging
import math import math
import os import os
from collections.abc import Mapping
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any, List, Mapping, Tuple, Union from typing import Any, Union
import albumentations as A import albumentations as A
import datasets import datasets
@ -61,7 +61,7 @@ require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/sema
# Copied from examples/pytorch/object-detection/run_object_detection.format_image_annotations_as_coco # Copied from examples/pytorch/object-detection/run_object_detection.format_image_annotations_as_coco
def format_image_annotations_as_coco( def format_image_annotations_as_coco(
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]] image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
) -> dict: ) -> dict:
"""Format one set of image annotations to the COCO format """Format one set of image annotations to the COCO format
@ -96,7 +96,7 @@ def format_image_annotations_as_coco(
# Copied from examples/pytorch/object-detection/run_object_detection.convert_bbox_yolo_to_pascal # Copied from examples/pytorch/object-detection/run_object_detection.convert_bbox_yolo_to_pascal
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor: def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
""" """
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
@ -152,7 +152,7 @@ def augment_and_transform_batch(
# Copied from examples/pytorch/object-detection/run_object_detection.collate_fn # Copied from examples/pytorch/object-detection/run_object_detection.collate_fn
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]: def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
data = {} data = {}
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
data["labels"] = [x["labels"] for x in batch] data["labels"] = [x["labels"] for x in batch]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc.. # Copyright 2018 HuggingFace Inc..
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -33,7 +32,7 @@ def get_results(output_dir):
results = {} results = {}
path = os.path.join(output_dir, "all_results.json") path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path): if os.path.exists(path):
with open(path, "r") as f: with open(path) as f:
results = json.load(f) results = json.load(f)
else: else:
raise ValueError(f"can't find {path}") raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -22,7 +21,7 @@ import logging
import os import os
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional, Tuple from typing import Optional
import datasets import datasets
import evaluate import evaluate
@ -469,7 +468,7 @@ def main():
question_column: str, question_column: str,
context_column: str, context_column: str,
answer_column: str, answer_column: str,
) -> Tuple[List[str], List[str]]: ) -> tuple[list[str], list[str]]:
questions = examples[question_column] questions = examples[question_column]
contexts = examples[context_column] contexts = examples[context_column]
answers = examples[answer_column] answers = examples[answer_column]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved. # Copyright 2021 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -18,7 +17,7 @@ A subclass of `Trainer` specific to Question-Answering tasks
import math import math
import time import time
from typing import Dict, List, Optional from typing import Optional
from torch.utils.data import Dataset from torch.utils.data import Dataset
@ -42,10 +41,10 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
self, self,
eval_dataset: Optional[Dataset] = None, eval_dataset: Optional[Dataset] = None,
eval_examples=None, eval_examples=None,
ignore_keys: Optional[List[str]] = None, ignore_keys: Optional[list[str]] = None,
metric_key_prefix: str = "eval", metric_key_prefix: str = "eval",
**gen_kwargs, **gen_kwargs,
) -> Dict[str, float]: ) -> dict[str, float]:
gen_kwargs = gen_kwargs.copy() gen_kwargs = gen_kwargs.copy()
# Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the # Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import collections
import json import json
import logging import logging
import os import os
from typing import Optional, Tuple from typing import Optional
import numpy as np import numpy as np
from tqdm.auto import tqdm from tqdm.auto import tqdm
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
def postprocess_qa_predictions( def postprocess_qa_predictions(
examples, examples,
features, features,
predictions: Tuple[np.ndarray, np.ndarray], predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False, version_2_with_negative: bool = False,
n_best_size: int = 20, n_best_size: int = 20,
max_answer_length: int = 30, max_answer_length: int = 30,
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
# If we have an output_dir, let's save all those dicts. # If we have an output_dir, let's save all those dicts.
if output_dir is not None: if output_dir is not None:
if not os.path.isdir(output_dir): if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.") raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join( prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
def postprocess_qa_predictions_with_beam_search( def postprocess_qa_predictions_with_beam_search(
examples, examples,
features, features,
predictions: Tuple[np.ndarray, np.ndarray], predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False, version_2_with_negative: bool = False,
n_best_size: int = 20, n_best_size: int = 20,
max_answer_length: int = 30, max_answer_length: int = 30,
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
# If we have an output_dir, let's save all those dicts. # If we have an output_dir, let's save all those dicts.
if output_dir is not None: if output_dir is not None:
if not os.path.isdir(output_dir): if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.") raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join( prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved. # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -258,7 +257,7 @@ def main():
else: else:
repo_id = data_args.dataset_name repo_id = data_args.dataset_name
filename = "id2label.json" filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r")) id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
id2label = {int(k): v for k, v in id2label.items()} id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: str(k) for k, v in id2label.items()} label2id = {v: str(k) for k, v in id2label.items()}

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved. # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -316,7 +315,7 @@ def main():
else: else:
repo_id = args.dataset_name repo_id = args.dataset_name
filename = "id2label.json" filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r")) id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
id2label = {int(k): v for k, v in id2label.items()} id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()} label2id = {v: k for k, v in id2label.items()}

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import math
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Union from typing import Optional, Union
import datasets import datasets
import torch import torch
@ -328,7 +327,7 @@ class DataCollatorForWav2Vec2Pretraining:
mask_time_prob: Optional[float] = 0.65 mask_time_prob: Optional[float] = 0.65
mask_time_length: Optional[int] = 10 mask_time_length: Optional[int] = 10
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# reformat list to dict and set to pytorch format # reformat list to dict and set to pytorch format
batch = self.feature_extractor.pad( batch = self.feature_extractor.pad(
features, features,
@ -716,7 +715,7 @@ def main():
} }
log_str = "" log_str = ""
for k, v in train_logs.items(): for k, v in train_logs.items():
log_str += "| {}: {:.3e}".format(k, v.item()) log_str += f"| {k}: {v.item():.3e}"
if accelerator.is_local_main_process: if accelerator.is_local_main_process:
progress_bar.write(log_str) progress_bar.write(log_str)
@ -773,7 +772,7 @@ def main():
log_str = "" log_str = ""
for k, v in val_logs.items(): for k, v in val_logs.items():
log_str += "| {}: {:.3e}".format(k, v.item()) log_str += f"| {k}: {v.item():.3e}"
if accelerator.is_local_main_process: if accelerator.is_local_main_process:
progress_bar.write(log_str) progress_bar.write(log_str)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -24,7 +23,7 @@ import re
import sys import sys
import warnings import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union from typing import Optional, Union
import datasets import datasets
import evaluate import evaluate
@ -211,11 +210,11 @@ class DataTrainingArguments:
) )
}, },
) )
chars_to_ignore: Optional[List[str]] = list_field( chars_to_ignore: Optional[list[str]] = list_field(
default=None, default=None,
metadata={"help": "A list of characters to remove from the transcripts."}, metadata={"help": "A list of characters to remove from the transcripts."},
) )
eval_metrics: List[str] = list_field( eval_metrics: list[str] = list_field(
default=["wer"], default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"}, metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
) )
@ -318,7 +317,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of_labels: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None
feature_extractor_input_name: Optional[str] = "input_values" feature_extractor_input_name: Optional[str] = "input_values"
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need # split inputs and labels since they have to be of different lengths and need
# different padding methods # different padding methods
input_features = [ input_features = [

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -24,7 +23,7 @@ import re
import sys import sys
import warnings import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union from typing import Optional, Union
import datasets import datasets
import evaluate import evaluate
@ -201,11 +200,11 @@ class DataTrainingArguments:
) )
}, },
) )
chars_to_ignore: Optional[List[str]] = list_field( chars_to_ignore: Optional[list[str]] = list_field(
default=None, default=None,
metadata={"help": "A list of characters to remove from the transcripts."}, metadata={"help": "A list of characters to remove from the transcripts."},
) )
eval_metrics: List[str] = list_field( eval_metrics: list[str] = list_field(
default=["wer"], default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"}, metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
) )
@ -300,7 +299,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of: Optional[int] = None pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need # split inputs and labels since they have to be of different lengths and need
# different padding methods # different padding methods
input_features = [{"input_values": feature["input_values"]} for feature in features] input_features = [{"input_values": feature["input_values"]} for feature in features]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team. All rights reserved. # Copyright 2021 The HuggingFace Team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -23,7 +22,7 @@ import logging
import os import os
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union from typing import Any, Optional, Union
import datasets import datasets
import evaluate import evaluate
@ -110,11 +109,11 @@ class ModelArguments:
freeze_encoder: bool = field( freeze_encoder: bool = field(
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."} default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
) )
forced_decoder_ids: List[List[int]] = field( forced_decoder_ids: list[list[int]] = field(
default=None, default=None,
metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."}, metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."},
) )
suppress_tokens: List[int] = field( suppress_tokens: list[int] = field(
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
@ -247,7 +246,7 @@ class DataCollatorSpeechSeq2SeqWithPadding:
decoder_start_token_id: int decoder_start_token_id: int
forward_attention_mask: bool forward_attention_mask: bool
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need # split inputs and labels since they have to be of different lengths and need
# different padding methods # different padding methods
model_input_name = self.processor.model_input_names[0] model_input_name = self.processor.model_input_names[0]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team. All rights reserved. # Copyright 2021 The HuggingFace Team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc.. # Copyright 2018 HuggingFace Inc..
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -51,7 +50,7 @@ def get_results(output_dir):
results = {} results = {}
path = os.path.join(output_dir, "all_results.json") path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path): if os.path.exists(path):
with open(path, "r") as f: with open(path) as f:
results = json.load(f) results = json.load(f)
else: else:
raise ValueError(f"can't find {path}") raise ValueError(f"can't find {path}")

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc.. # Copyright 2018 HuggingFace Inc..
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -87,7 +86,7 @@ def get_results(output_dir):
results = {} results = {}
path = os.path.join(output_dir, "all_results.json") path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path): if os.path.exists(path):
with open(path, "r") as f: with open(path) as f:
results = json.load(f) results = json.load(f)
else: else:
raise ValueError(f"can't find {path}") raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. # Copyright 2020 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -21,7 +20,7 @@ import os
import random import random
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import Optional
import datasets import datasets
import evaluate import evaluate
@ -256,7 +255,7 @@ class ModelArguments:
) )
def get_label_list(raw_dataset, split="train") -> List[str]: def get_label_list(raw_dataset, split="train") -> list[str]:
"""Get the list of labels from a multi-label dataset""" """Get the list of labels from a multi-label dataset"""
if isinstance(raw_dataset[split]["label"][0], list): if isinstance(raw_dataset[split]["label"][0], list):
@ -537,7 +536,7 @@ def main():
model.config.id2label = {id: label for label, id in label_to_id.items()} model.config.id2label = {id: label for label, id in label_to_id.items()}
elif not is_regression: # classification, but not training elif not is_regression: # classification, but not training
logger.info("using label infos in the model config") logger.info("using label infos in the model config")
logger.info("label2id: {}".format(model.config.label2id)) logger.info(f"label2id: {model.config.label2id}")
label_to_id = model.config.label2id label_to_id = model.config.label2id
else: # regression else: # regression
label_to_id = None label_to_id = None
@ -549,7 +548,7 @@ def main():
) )
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
def multi_labels_to_ids(labels: List[str]) -> List[float]: def multi_labels_to_ids(labels: list[str]) -> list[float]:
ids = [0.0] * len(label_to_id) # BCELoss requires float as target type ids = [0.0] * len(label_to_id) # BCELoss requires float as target type
for label in labels: for label in labels:
ids[label_to_id[label]] = 1.0 ids[label_to_id[label]] = 1.0
@ -735,7 +734,7 @@ def main():
else: else:
item = label_list[item] item = label_list[item]
writer.write(f"{index}\t{item}\n") writer.write(f"{index}\t{item}\n")
logger.info("Predict results saved at {}".format(output_predict_file)) logger.info(f"Predict results saved at {output_predict_file}")
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
if training_args.push_to_hub: if training_args.push_to_hub:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. # Copyright 2020 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
# #
@ -19,7 +18,6 @@
import argparse import argparse
import inspect import inspect
import logging import logging
from typing import Tuple
import torch import torch
from accelerate import PartialState from accelerate import PartialState
@ -271,8 +269,8 @@ class _ModelFallbackWrapper(GenerationMixin):
) )
def _reorder_cache( def _reorder_cache(
self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor self, past_key_values: tuple[tuple[torch.Tensor]], beam_idx: torch.Tensor
) -> Tuple[Tuple[torch.Tensor]]: ) -> tuple[tuple[torch.Tensor]]:
""" """
This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or
[`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct [`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2022 University of Cambridge, Tencent AI Lab, DeepMind and The University of Hong Kong Authors and The HuggingFace Inc. team. All rights reserved. # Copyright 2022 University of Cambridge, Tencent AI Lab, DeepMind and The University of Hong Kong Authors and The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved. # Copyright 2020 The HuggingFace Team All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");

Some files were not shown because too many files have changed in this diff Show More