Use Python 3.9 syntax in examples (#37279)

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyyever 2025-04-07 19:52:21 +08:00 committed by GitHub
parent 08f36771b3
commit 0fb8d49e88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
123 changed files with 358 additions and 451 deletions

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -833,8 +832,7 @@ def main():
# No need to shuffle here
loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False)
for batch in loader:
yield batch
yield from loader
# Metric
metric = evaluate.load("rouge", cache_dir=model_args.cache_dir)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Dict, List, Optional
from typing import Optional
import flax
import jax
@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM:
" language modeling. "
)
def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding:
def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding:
# convert list to dict and tensorize input
batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -33,7 +32,7 @@ from itertools import chain
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from typing import Optional
import flax
import jax
@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling:
"You should pass `mlm=False` to train on causal language modeling instead."
)
def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]:
def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]:
# Handle dict or lists with proper padding and conversion to tensor.
batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY)
@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling:
def mask_tokens(
self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray]
) -> Tuple[np.ndarray, np.ndarray]:
) -> tuple[np.ndarray, np.ndarray]:
"""
Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
"""

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Dict, List, Optional
from typing import Optional
import flax
import jax
@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM:
pad_token_id: int
decoder_start_token_id: int
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding:
def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding:
# convert list to dict and tensorize input
batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import json
from typing import Iterator, List, Union
from collections.abc import Iterator
from typing import Union
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers
from tokenizers.implementations.base_tokenizer import BaseTokenizer
@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
def train(
self,
files: Union[str, List[str]],
files: Union[str, list[str]],
vocab_size: int = 8000,
show_progress: bool = True,
):

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -28,7 +27,7 @@ import time
from dataclasses import asdict, dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@ -908,8 +907,8 @@ def main():
# region Define train step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
start_positions = batch.pop("start_positions")

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import collections
import json
import logging
import os
from typing import Optional, Tuple
from typing import Optional
import numpy as np
from tqdm.auto import tqdm
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
def postprocess_qa_predictions(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
def postprocess_qa_predictions_with_beam_search(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -25,7 +24,7 @@ import time
from dataclasses import field
from functools import partial
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
from typing import Any, Callable, Optional, Union
import datasets
import evaluate
@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
pad_input_to_multiple_of: Optional[int] = None
pad_target_to_multiple_of: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
model_input_name = self.processor.model_input_names[0]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -64,7 +63,7 @@ def get_setup_file():
def get_results(output_dir, split="eval"):
path = os.path.join(output_dir, f"{split}_results.json")
if os.path.exists(path):
with open(path, "r") as f:
with open(path) as f:
return json.load(f)
raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -25,7 +24,7 @@ import time
import warnings
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@ -572,8 +571,8 @@ def main():
# define step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@ -651,8 +650,8 @@ def main():
# define step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -15,7 +15,7 @@
import csv
from collections import defaultdict
from dataclasses import dataclass, field
from typing import List, Optional
from typing import Optional
import matplotlib.pyplot as plt
import numpy as np
@ -59,7 +59,7 @@ class PlotArguments:
default=None,
metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."},
)
short_model_names: Optional[List[str]] = list_field(
short_model_names: Optional[list[str]] = list_field(
default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."}
)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -18,7 +17,7 @@
import logging
import os
from dataclasses import dataclass, field
from typing import Dict, Optional
from typing import Optional
import numpy as np
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
@ -187,7 +186,7 @@ def main():
else None
)
def compute_metrics(p: EvalPrediction) -> Dict:
def compute_metrics(p: EvalPrediction) -> dict:
preds = np.argmax(p.predictions, axis=1)
return {"acc": simple_accuracy(preds, p.label_ids)}
@ -228,7 +227,7 @@ def main():
logger.info("***** Eval results *****")
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
writer.write("{} = {}\n".format(key, value))
results.update(result)

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -22,7 +21,7 @@ import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional
from typing import Optional
import tqdm
from filelock import FileLock
@ -49,8 +48,8 @@ class InputExample:
example_id: str
question: str
contexts: List[str]
endings: List[str]
contexts: list[str]
endings: list[str]
label: Optional[str]
@ -62,9 +61,9 @@ class InputFeatures:
"""
example_id: str
input_ids: List[List[int]]
attention_mask: Optional[List[List[int]]]
token_type_ids: Optional[List[List[int]]]
input_ids: list[list[int]]
attention_mask: Optional[list[list[int]]]
token_type_ids: Optional[list[list[int]]]
label: Optional[int]
@ -84,7 +83,7 @@ if is_torch_available():
soon.
"""
features: List[InputFeatures]
features: list[InputFeatures]
def __init__(
self,
@ -149,7 +148,7 @@ if is_tf_available():
soon.
"""
features: List[InputFeatures]
features: list[InputFeatures]
def __init__(
self,
@ -253,7 +252,7 @@ class RaceProcessor(DataProcessor):
def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
high = os.path.join(data_dir, "train/high")
middle = os.path.join(data_dir, "train/middle")
high = self._read_txt(high)
@ -262,7 +261,7 @@ class RaceProcessor(DataProcessor):
def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
high = os.path.join(data_dir, "dev/high")
middle = os.path.join(data_dir, "dev/middle")
high = self._read_txt(high)
@ -271,7 +270,7 @@ class RaceProcessor(DataProcessor):
def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} test".format(data_dir))
logger.info(f"LOOKING AT {data_dir} test")
high = os.path.join(data_dir, "test/high")
middle = os.path.join(data_dir, "test/middle")
high = self._read_txt(high)
@ -286,7 +285,7 @@ class RaceProcessor(DataProcessor):
lines = []
files = glob.glob(input_dir + "/*txt")
for file in tqdm.tqdm(files, desc="read files"):
with open(file, "r", encoding="utf-8") as fin:
with open(file, encoding="utf-8") as fin:
data_raw = json.load(fin)
data_raw["race_id"] = file
lines.append(data_raw)
@ -296,7 +295,7 @@ class RaceProcessor(DataProcessor):
"""Creates examples for the training and dev sets."""
examples = []
for _, data_raw in enumerate(lines):
race_id = "%s-%s" % (set_type, data_raw["race_id"])
race_id = "{}-{}".format(set_type, data_raw["race_id"])
article = data_raw["article"]
for i in range(len(data_raw["answers"])):
truth = str(ord(data_raw["answers"][i]) - ord("A"))
@ -320,17 +319,17 @@ class SynonymProcessor(DataProcessor):
def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev")
def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test")
@ -339,10 +338,10 @@ class SynonymProcessor(DataProcessor):
return ["0", "1", "2", "3", "4"]
def _read_csv(self, input_file):
with open(input_file, "r", encoding="utf-8") as f:
with open(input_file, encoding="utf-8") as f:
return list(csv.reader(f))
def _create_examples(self, lines: List[List[str]], type: str):
def _create_examples(self, lines: list[list[str]], type: str):
"""Creates examples for the training and dev sets."""
examples = [
@ -366,17 +365,17 @@ class SwagProcessor(DataProcessor):
def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")
def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code "
"setting!"
@ -388,10 +387,10 @@ class SwagProcessor(DataProcessor):
return ["0", "1", "2", "3"]
def _read_csv(self, input_file):
with open(input_file, "r", encoding="utf-8") as f:
with open(input_file, encoding="utf-8") as f:
return list(csv.reader(f))
def _create_examples(self, lines: List[List[str]], type: str):
def _create_examples(self, lines: list[list[str]], type: str):
"""Creates examples for the training and dev sets."""
if type == "train" and lines[0][-1] != "label":
raise ValueError("For training, the input file must contain a label column.")
@ -417,16 +416,16 @@ class ArcProcessor(DataProcessor):
def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev")
def get_test_examples(self, data_dir):
logger.info("LOOKING AT {} test".format(data_dir))
logger.info(f"LOOKING AT {data_dir} test")
return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test")
def get_labels(self):
@ -434,7 +433,7 @@ class ArcProcessor(DataProcessor):
return ["0", "1", "2", "3"]
def _read_json(self, input_file):
with open(input_file, "r", encoding="utf-8") as fin:
with open(input_file, encoding="utf-8") as fin:
lines = fin.readlines()
return lines
@ -504,11 +503,11 @@ class ArcProcessor(DataProcessor):
def convert_examples_to_features(
examples: List[InputExample],
label_list: List[str],
examples: list[InputExample],
label_list: list[str],
max_length: int,
tokenizer: PreTrainedTokenizer,
) -> List[InputFeatures]:
) -> list[InputFeatures]:
"""
Loads a data file into a list of `InputFeatures`
"""

View File

@ -2,7 +2,7 @@ import argparse
import logging
import os
from pathlib import Path
from typing import Any, Dict
from typing import Any
import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_info
@ -201,7 +201,7 @@ class BaseTransformer(pl.LightningModule):
)
@pl.utilities.rank_zero_only
def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
def on_save_checkpoint(self, checkpoint: dict[str, Any]) -> None:
save_path = self.output_dir.joinpath("best_tfmr")
self.model.config.save_step = self.step_count
self.model.save_pretrained(save_path)
@ -282,7 +282,7 @@ class LoggingCallback(pl.Callback):
# Log results
for key in sorted(metrics):
if key not in ["log", "progress_bar"]:
rank_zero_info("{} = {}\n".format(key, str(metrics[key])))
rank_zero_info(f"{key} = {str(metrics[key])}\n")
def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule):
rank_zero_info("***** Test results *****")
@ -292,8 +292,8 @@ class LoggingCallback(pl.Callback):
with open(output_test_results_file, "w") as writer:
for key in sorted(metrics):
if key not in ["log", "progress_bar"]:
rank_zero_info("{} = {}\n".format(key, str(metrics[key])))
writer.write("{} = {}\n".format(key, str(metrics[key])))
rank_zero_info(f"{key} = {str(metrics[key])}\n")
writer.write(f"{key} = {str(metrics[key])}\n")
def add_generic_args(parser, root_dir) -> None:

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -231,14 +230,14 @@ def train(args, train_dataset, model, tokenizer):
if args.local_rank == -1 and args.evaluate_during_training:
results = evaluate(args, model, tokenizer)
for key, value in results.items():
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
tb_writer.add_scalar(f"eval_{key}", value, global_step)
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
logging_loss = tr_loss
# Save model checkpoint
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
# Take care of distributed/parallel training
model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir)
@ -281,7 +280,7 @@ def evaluate(args, model, tokenizer, prefix=""):
model = torch.nn.DataParallel(model)
# Eval!
logger.info("***** Running evaluation {} *****".format(prefix))
logger.info(f"***** Running evaluation {prefix} *****")
logger.info(" Num examples = %d", len(dataset))
logger.info(" Batch size = %d", args.eval_batch_size)
@ -348,11 +347,11 @@ def evaluate(args, model, tokenizer, prefix=""):
logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset))
# Compute predictions
output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix))
output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix))
output_prediction_file = os.path.join(args.output_dir, f"predictions_{prefix}.json")
output_nbest_file = os.path.join(args.output_dir, f"nbest_predictions_{prefix}.json")
if args.version_2_with_negative:
output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix))
output_null_log_odds_file = os.path.join(args.output_dir, f"null_odds_{prefix}.json")
else:
output_null_log_odds_file = None
@ -828,10 +827,10 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
logger.info(f"Results: {results}")
return results

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#

View File

@ -20,10 +20,10 @@ def fill_mask(masked_input, model, tokenizer, topk=5):
topk_filled_outputs = []
for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")):
predicted_token = predicted_token_bpe.replace("\u2581", " ")
if " {0}".format(masked_token) in masked_input:
if f" {masked_token}" in masked_input:
topk_filled_outputs.append(
(
masked_input.replace(" {0}".format(masked_token), predicted_token),
masked_input.replace(f" {masked_token}", predicted_token),
values[index].item(),
predicted_token,
)

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python
import argparse
import json
from typing import List
from ltp import LTP
@ -42,7 +41,7 @@ def is_chinese(word: str):
return 1
def get_chinese_word(tokens: List[str]):
def get_chinese_word(tokens: list[str]):
word_set = set()
for token in tokens:
@ -53,7 +52,7 @@ def get_chinese_word(tokens: List[str]):
return word_list
def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
def add_sub_symbol(bert_tokens: list[str], chinese_word_set: set()):
if not chinese_word_set:
return bert_tokens
max_word_len = max([len(w) for w in chinese_word_set])
@ -77,7 +76,7 @@ def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
return bert_word
def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer):
def prepare_ref(lines: list[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer):
ltp_res = []
for i in range(0, len(lines), 100):
@ -117,7 +116,7 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni
def main(args):
# For Chinese (Ro)Bert, the best result is from : RoBERTa-wwm-ext (https://github.com/ymcui/Chinese-BERT-wwm)
# If we want to fine-tune these model, we have to use same tokenizer : LTP (https://github.com/HIT-SCIR/ltp)
with open(args.file_name, "r", encoding="utf-8") as f:
with open(args.file_name, encoding="utf-8") as f:
data = f.readlines()
data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] # avoid delimiter like '\u2029'
ltp_tokenizer = LTP(args.ltp) # faster in GPU device

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -358,7 +357,7 @@ def main():
logger.info("***** Eval results *****")
for key in sorted(result.keys()):
logger.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key])))
writer.write("{} = {}\n".format(key, str(result[key])))
results.update(result)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -163,7 +162,7 @@ def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
logger.info("device: {}, n_gpu {}".format(device, n_gpu))
logger.info(f"device: {device}, n_gpu {n_gpu}")
if not args.do_train and not args.do_eval:
raise ValueError("At least one of `do_train` or `do_eval` must be True.")
@ -261,7 +260,7 @@ def main():
loss.item() if exp_average_loss is None else 0.7 * exp_average_loss + 0.3 * loss.item()
)
nb_tr_steps += 1
tqdm_bar.desc = "Training loss: {:.2e} lr: {:.2e}".format(exp_average_loss, scheduler.get_lr()[0])
tqdm_bar.desc = f"Training loss: {exp_average_loss:.2e} lr: {scheduler.get_lr()[0]:.2e}"
# Save a trained model
if args.do_train:
@ -313,7 +312,7 @@ def main():
logger.info("***** Eval results *****")
for key in sorted(result.keys()):
logger.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key])))
writer.write("{} = {}\n".format(key, str(result[key])))
if __name__ == "__main__":

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -51,7 +50,7 @@ except ImportError:
logger = logging.getLogger(__name__)
class SwagExample(object):
class SwagExample:
"""A single training/test example for the SWAG dataset."""
def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None):
@ -71,22 +70,22 @@ class SwagExample(object):
def __repr__(self):
attributes = [
"swag_id: {}".format(self.swag_id),
"context_sentence: {}".format(self.context_sentence),
"start_ending: {}".format(self.start_ending),
"ending_0: {}".format(self.endings[0]),
"ending_1: {}".format(self.endings[1]),
"ending_2: {}".format(self.endings[2]),
"ending_3: {}".format(self.endings[3]),
f"swag_id: {self.swag_id}",
f"context_sentence: {self.context_sentence}",
f"start_ending: {self.start_ending}",
f"ending_0: {self.endings[0]}",
f"ending_1: {self.endings[1]}",
f"ending_2: {self.endings[2]}",
f"ending_3: {self.endings[3]}",
]
if self.label is not None:
attributes.append("label: {}".format(self.label))
attributes.append(f"label: {self.label}")
return ", ".join(attributes)
class InputFeatures(object):
class InputFeatures:
def __init__(self, example_id, choices_features, label):
self.example_id = example_id
self.choices_features = [
@ -97,7 +96,7 @@ class InputFeatures(object):
def read_swag_examples(input_file, is_training=True):
with open(input_file, "r", encoding="utf-8") as f:
with open(input_file, encoding="utf-8") as f:
lines = list(csv.reader(f))
if is_training and lines[0][-1] != "label":
@ -179,15 +178,15 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, is_trainin
label = example.label
if example_index < 5:
logger.info("*** Example ***")
logger.info("swag_id: {}".format(example.swag_id))
logger.info(f"swag_id: {example.swag_id}")
for choice_idx, (tokens, input_ids, input_mask, segment_ids) in enumerate(choices_features):
logger.info("choice: {}".format(choice_idx))
logger.info(f"choice: {choice_idx}")
logger.info("tokens: {}".format(" ".join(tokens)))
logger.info("input_ids: {}".format(" ".join(map(str, input_ids))))
logger.info("input_mask: {}".format(" ".join(map(str, input_mask))))
logger.info("segment_ids: {}".format(" ".join(map(str, segment_ids))))
if is_training:
logger.info("label: {}".format(label))
logger.info(f"label: {label}")
features.append(InputFeatures(example_id=example.swag_id, choices_features=choices_features, label=label))
@ -382,14 +381,14 @@ def train(args, train_dataset, model, tokenizer):
): # Only evaluate when single GPU otherwise metrics may not average well
results = evaluate(args, model, tokenizer)
for key, value in results.items():
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
tb_writer.add_scalar(f"eval_{key}", value, global_step)
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
logging_loss = tr_loss
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
# Save model checkpoint
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
model_to_save = (
model.module if hasattr(model, "module") else model
) # Take care of distributed/parallel training
@ -423,7 +422,7 @@ def evaluate(args, model, tokenizer, prefix=""):
eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
# Eval!
logger.info("***** Running evaluation {} *****".format(prefix))
logger.info(f"***** Running evaluation {prefix} *****")
logger.info(" Num examples = %d", len(dataset))
logger.info(" Batch size = %d", args.eval_batch_size)
@ -466,7 +465,7 @@ def evaluate(args, model, tokenizer, prefix=""):
logger.info("***** Eval results *****")
for key in sorted(result.keys()):
logger.info("%s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key])))
writer.write("{} = {}\n".format(key, str(result[key])))
return result
@ -710,10 +709,10 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
logger.info(f"Results: {results}")
return results

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -66,7 +65,7 @@ def main():
ptvsd.wait_for_attach()
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
logger.info("device: {}".format(device))
logger.info(f"device: {device}")
# Load a pre-processed dataset
# You can also build the corpus yourself using TransfoXLCorpus methods
@ -111,7 +110,7 @@ def main():
total_loss += seq_len * loss.item()
total_len += seq_len
total_time = time.time() - start_time
logger.info("Time : {:.2f}s, {:.2f}ms/segment".format(total_time, 1000 * total_time / (idx + 1)))
logger.info(f"Time : {total_time:.2f}s, {1000 * total_time / (idx + 1):.2f}ms/segment")
return total_loss / total_len
# Run on test data.

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 Huggingface
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -13,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import json
import unittest
@ -25,7 +23,7 @@ from utils import calculate_bleu
filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json"
with io.open(filename, "r", encoding="utf-8") as f:
with open(filename, encoding="utf-8") as f:
bleu_data = json.load(f)

View File

@ -19,7 +19,6 @@ import time
from json import JSONDecodeError
from logging import getLogger
from pathlib import Path
from typing import Dict, List
import torch
from torch.utils.data import DataLoader
@ -55,10 +54,10 @@ def eval_data_dir(
task="summarization",
local_rank=None,
num_return_sequences=1,
dataset_kwargs: Dict = None,
dataset_kwargs: dict = None,
prefix="",
**generate_kwargs,
) -> Dict:
) -> dict:
"""Run evaluation on part of the data for one gpu and save to {save_dir}/rank_{rank}_output.json"""
model_name = str(model_name)
assert local_rank is not None
@ -211,7 +210,7 @@ def run_generate():
calc_bleu = "translation" in args.task
score_fn = calculate_bleu if calc_bleu else calculate_rouge
metric_name = "bleu" if calc_bleu else "rouge"
metrics: Dict = score_fn(preds, labels)
metrics: dict = score_fn(preds, labels)
metrics["n_obs"] = len(preds)
runtime = time.time() - start_time
metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4)
@ -227,7 +226,7 @@ def run_generate():
shutil.rmtree(json_save_dir)
def combine_partial_results(partial_results) -> List:
def combine_partial_results(partial_results) -> list:
"""Concatenate partial results into one file, then sort it by id."""
records = []
for partial_result in partial_results:
@ -237,7 +236,7 @@ def combine_partial_results(partial_results) -> List:
return preds
def gather_results_from_each_node(num_replicas, save_dir, timeout) -> List[Dict[str, List]]:
def gather_results_from_each_node(num_replicas, save_dir, timeout) -> list[dict[str, list]]:
# WAIT FOR lots of .json files
start_wait = time.time()
logger.info("waiting for all nodes to finish")

View File

@ -20,7 +20,6 @@ import time
import warnings
from logging import getLogger
from pathlib import Path
from typing import Dict, List
import torch
from tqdm import tqdm
@ -36,7 +35,7 @@ DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def generate_summaries_or_translations(
examples: List[str],
examples: list[str],
out_file: str,
model_name: str,
batch_size: int = 8,
@ -45,7 +44,7 @@ def generate_summaries_or_translations(
task="summarization",
prefix=None,
**generate_kwargs,
) -> Dict:
) -> dict:
"""Save model.generate results to <out_file>, and return how long it took."""
fout = Path(out_file).open("w", encoding="utf-8")
model_name = str(model_name)

View File

@ -34,7 +34,7 @@ task_score_names = {
def parse_search_arg(search):
groups = search.split()
entries = dict((g.split("=") for g in groups))
entries = dict(g.split("=") for g in groups)
entry_names = list(entries.keys())
sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
matrix = [list(x) for x in itertools.product(*sets)]
@ -105,7 +105,7 @@ def run_search():
col_widths = {col: len(str(col)) for col in col_names}
results = []
for r in matrix:
hparams = dict((x.replace("--", "").split() for x in r))
hparams = dict(x.replace("--", "").split() for x in r)
args_exp = " ".join(r).split()
args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
sys.argv = args_normal + args_exp

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Optional, Union
import torch
from torch import nn
@ -172,10 +172,10 @@ class Seq2SeqTrainer(Trainer):
def prediction_step(
self,
model: nn.Module,
inputs: Dict[str, Union[torch.Tensor, Any]],
inputs: dict[str, Union[torch.Tensor, Any]],
prediction_loss_only: bool,
ignore_keys: Optional[List[str]] = None,
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
ignore_keys: Optional[list[str]] = None,
) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on :obj:`model` using obj:`inputs`.

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python
import io
import json
import subprocess
@ -29,5 +28,5 @@ def get_all_data(pairs, n_objs):
text = get_all_data(pairs, n_objs)
filename = "./fsmt_val_data.json"
with io.open(filename, "w", encoding="utf-8") as f:
with open(filename, "w", encoding="utf-8") as f:
bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)

View File

@ -19,9 +19,10 @@ import math
import os
import pickle
import socket
from collections.abc import Iterable
from logging import getLogger
from pathlib import Path
from typing import Callable, Dict, Iterable, List, Tuple, Union
from typing import Callable, Union
import git
import numpy as np
@ -67,7 +68,7 @@ def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=-100):
return loss, nll_loss
def lmap(f: Callable, x: Iterable) -> List:
def lmap(f: Callable, x: Iterable) -> list:
"""list(map(f, x))"""
return list(map(f, x))
@ -77,11 +78,11 @@ def calculate_bleu(output_lns, refs_lns, **kwargs) -> dict:
return {"bleu": round(corpus_bleu(output_lns, [refs_lns], **kwargs).score, 4)}
def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], Dict]:
def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], dict]:
def non_pad_len(tokens: np.ndarray) -> int:
return np.count_nonzero(tokens != tokenizer.pad_token_id)
def decode_pred(pred: EvalPrediction) -> Tuple[List[str], List[str]]:
def decode_pred(pred: EvalPrediction) -> tuple[list[str], list[str]]:
pred_ids = pred.predictions
label_ids = pred.label_ids
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
@ -91,16 +92,16 @@ def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) ->
label_str = lmap(str.strip, label_str)
return pred_str, label_str
def summarization_metrics(pred: EvalPrediction) -> Dict:
def summarization_metrics(pred: EvalPrediction) -> dict:
pred_str, label_str = decode_pred(pred)
rouge: Dict = calculate_rouge(pred_str, label_str)
rouge: dict = calculate_rouge(pred_str, label_str)
summ_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
rouge.update({"gen_len": summ_len})
return rouge
def translation_metrics(pred: EvalPrediction) -> Dict:
def translation_metrics(pred: EvalPrediction) -> dict:
pred_str, label_str = decode_pred(pred)
bleu: Dict = calculate_bleu(pred_str, label_str)
bleu: dict = calculate_bleu(pred_str, label_str)
gen_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
bleu.update({"gen_len": gen_len})
return bleu
@ -183,7 +184,7 @@ class AbstractSeq2SeqDataset(Dataset):
return min(self.src_lens[i], self.max_target_length)
# call fairseq cython function
batch_sampler: List[List[int]] = batch_by_size(
batch_sampler: list[list[int]] = batch_by_size(
sorted_indices,
num_tokens_fn=num_tokens_in_example,
max_tokens=max_tokens_per_batch,
@ -207,7 +208,7 @@ class AbstractSeq2SeqDataset(Dataset):
class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
def __getitem__(self, index) -> Dict[str, torch.Tensor]:
def __getitem__(self, index) -> dict[str, torch.Tensor]:
"""Call tokenizer on src and tgt_lines"""
index = index + 1 # linecache starts at 1
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
@ -237,7 +238,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
**self.dataset_kwargs,
)
def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
def collate_fn(self, batch) -> dict[str, torch.Tensor]:
input_ids = torch.stack([x["input_ids"] for x in batch])
masks = torch.stack([x["attention_mask"] for x in batch])
target_ids = torch.stack([x["labels"] for x in batch])
@ -255,7 +256,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
class Seq2SeqDataset(AbstractSeq2SeqDataset):
"""A dataset that calls prepare_seq2seq_batch."""
def __getitem__(self, index) -> Dict[str, str]:
def __getitem__(self, index) -> dict[str, str]:
index = index + 1 # linecache starts at 1
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
tgt_line = linecache.getline(str(self.tgt_file), index).rstrip("\n")
@ -263,9 +264,9 @@ class Seq2SeqDataset(AbstractSeq2SeqDataset):
assert tgt_line, f"empty tgt line for index {index}"
return {"tgt_texts": tgt_line, "src_texts": source_line, "id": index - 1}
def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
def collate_fn(self, batch) -> dict[str, torch.Tensor]:
"""Call prepare_seq2seq_batch."""
batch_encoding: Dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch(
batch_encoding: dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch(
[x["src_texts"] for x in batch],
tgt_texts=[x["tgt_texts"] for x in batch],
max_length=self.max_source_length,
@ -293,7 +294,7 @@ class Seq2SeqDataCollator:
if data_args.tgt_lang is not None:
self.dataset_kwargs["tgt_lang"] = data_args.tgt_lang
def __call__(self, batch) -> Dict[str, torch.Tensor]:
def __call__(self, batch) -> dict[str, torch.Tensor]:
if hasattr(self.tokenizer, "prepare_seq2seq_batch"):
batch = self._encode(batch)
input_ids, attention_mask, labels = (
@ -329,7 +330,7 @@ class Seq2SeqDataCollator:
shifted_input_ids[..., 0] = self.pad_token_id
return shifted_input_ids
def _encode(self, batch) -> Dict[str, torch.Tensor]:
def _encode(self, batch) -> dict[str, torch.Tensor]:
batch_encoding = self.tokenizer.prepare_seq2seq_batch(
[x["src_texts"] for x in batch],
tgt_texts=[x["tgt_texts"] for x in batch],
@ -355,7 +356,7 @@ class SortishSampler(Sampler):
return iter(sortish_sampler_indices(self.data, self.bs, shuffle=self.shuffle))
def sortish_sampler_indices(data: List, bs: int, shuffle=True) -> np.array:
def sortish_sampler_indices(data: list, bs: int, shuffle=True) -> np.array:
"Go through the text data by order of src length with a bit of randomness. From fastai repo."
if not shuffle:
return np.argsort(np.array(data) * -1)
@ -455,7 +456,7 @@ def pickle_save(obj, path):
return pickle.dump(obj, f)
def flatten_list(summary_ids: List[List]):
def flatten_list(summary_ids: list[list]):
return list(itertools.chain.from_iterable(summary_ids))
@ -506,14 +507,14 @@ def extract_rouge_mid_statistics(dct):
def calculate_rouge(
pred_lns: List[str],
tgt_lns: List[str],
pred_lns: list[str],
tgt_lns: list[str],
use_stemmer=True,
rouge_keys=ROUGE_KEYS,
return_precision_and_recall=False,
bootstrap_aggregation=True,
newline_sep=True,
) -> Dict:
) -> dict:
"""Calculate rouge using rouge_scorer package.
Args:
@ -590,19 +591,19 @@ def any_requires_grad(model: nn.Module) -> bool:
def assert_all_frozen(model):
model_grads: List[bool] = list(grad_status(model))
model_grads: list[bool] = list(grad_status(model))
n_require_grad = sum(lmap(int, model_grads))
npars = len(model_grads)
assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad"
def assert_not_all_frozen(model):
model_grads: List[bool] = list(grad_status(model))
model_grads: list[bool] = list(grad_status(model))
npars = len(model_grads)
assert any(model_grads), f"none of {npars} weights require grad"
def parse_numeric_n_bool_cl_kwargs(unparsed_args: List[str]) -> Dict[str, Union[int, float, bool]]:
def parse_numeric_n_bool_cl_kwargs(unparsed_args: list[str]) -> dict[str, Union[int, float, bool]]:
"""
Parse an argv list of unspecified command line args to a dict.
Assumes all values are either numeric or boolean in the form of true/false.

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -20,7 +19,7 @@ import os
import sys
from dataclasses import dataclass, field
from importlib import import_module
from typing import Dict, List, Optional, Tuple
from typing import Optional
import numpy as np
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
@ -159,7 +158,7 @@ def main():
# Prepare CONLL-2003 task
labels = token_classification_task.get_labels(data_args.labels)
label_map: Dict[int, str] = dict(enumerate(labels))
label_map: dict[int, str] = dict(enumerate(labels))
num_labels = len(labels)
# Load pretrained model and tokenizer
@ -217,7 +216,7 @@ def main():
else None
)
def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[List[int], List[int]]:
def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> tuple[list[int], list[int]]:
preds = np.argmax(predictions, axis=2)
batch_size, seq_len = preds.shape
@ -233,7 +232,7 @@ def main():
return preds_list, out_label_list
def compute_metrics(p: EvalPrediction) -> Dict:
def compute_metrics(p: EvalPrediction) -> dict:
preds_list, out_label_list = align_predictions(p.predictions, p.label_ids)
return {
"accuracy_score": accuracy_score(out_label_list, preds_list),
@ -279,7 +278,7 @@ def main():
logger.info("***** Eval results *****")
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
writer.write("{} = {}\n".format(key, value))
results.update(result)
@ -304,13 +303,13 @@ def main():
with open(output_test_results_file, "w") as writer:
for key, value in metrics.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
writer.write("{} = {}\n".format(key, value))
# Save predictions
output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
if trainer.is_world_process_zero():
with open(output_test_predictions_file, "w") as writer:
with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f:
with open(os.path.join(data_args.data_dir, "test.txt")) as f:
token_classification_task.write_predictions_to_file(writer, f, preds_list)
return results

View File

@ -12,7 +12,7 @@ subword_len_counter = 0
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
max_len -= tokenizer.num_special_tokens_to_add()
with open(dataset, "rt") as f_p:
with open(dataset) as f_p:
for line in f_p:
line = line.rstrip()

View File

@ -1,6 +1,6 @@
import logging
import os
from typing import List, TextIO, Union
from typing import TextIO, Union
from conllu import parse_incr
from utils_ner import InputExample, Split, TokenClassificationTask
@ -14,7 +14,7 @@ class NER(TokenClassificationTask):
# in NER datasets, the last column is usually reserved for NER label
self.label_idx = label_idx
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]:
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
if isinstance(mode, Split):
mode = mode.value
file_path = os.path.join(data_dir, f"{mode}.txt")
@ -42,7 +42,7 @@ class NER(TokenClassificationTask):
examples.append(InputExample(guid=f"{mode}-{guid_index}", words=words, labels=labels))
return examples
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List):
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
example_id = 0
for line in test_input_reader:
if line.startswith("-DOCSTART-") or line == "" or line == "\n":
@ -55,9 +55,9 @@ class NER(TokenClassificationTask):
else:
logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0])
def get_labels(self, path: str) -> List[str]:
def get_labels(self, path: str) -> list[str]:
if path:
with open(path, "r") as f:
with open(path) as f:
labels = f.read().splitlines()
if "O" not in labels:
labels = ["O"] + labels
@ -71,9 +71,9 @@ class Chunk(NER):
# in CONLL2003 dataset chunk column is second-to-last
super().__init__(label_idx=-2)
def get_labels(self, path: str) -> List[str]:
def get_labels(self, path: str) -> list[str]:
if path:
with open(path, "r") as f:
with open(path) as f:
labels = f.read().splitlines()
if "O" not in labels:
labels = ["O"] + labels
@ -105,7 +105,7 @@ class Chunk(NER):
class POS(TokenClassificationTask):
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]:
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
if isinstance(mode, Split):
mode = mode.value
file_path = os.path.join(data_dir, f"{mode}.txt")
@ -125,7 +125,7 @@ class POS(TokenClassificationTask):
guid_index += 1
return examples
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List):
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
example_id = 0
for sentence in parse_incr(test_input_reader):
s_p = preds_list[example_id]
@ -136,9 +136,9 @@ class POS(TokenClassificationTask):
writer.write(out)
example_id += 1
def get_labels(self, path: str) -> List[str]:
def get_labels(self, path: str) -> list[str]:
if path:
with open(path, "r") as f:
with open(path) as f:
return f.read().splitlines()
else:
return [

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -19,7 +18,7 @@ import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Union
from typing import Optional, Union
from filelock import FileLock
@ -42,8 +41,8 @@ class InputExample:
"""
guid: str
words: List[str]
labels: Optional[List[str]]
words: list[str]
labels: Optional[list[str]]
@dataclass
@ -53,10 +52,10 @@ class InputFeatures:
Property names are the same names as the corresponding inputs to a model.
"""
input_ids: List[int]
attention_mask: List[int]
token_type_ids: Optional[List[int]] = None
label_ids: Optional[List[int]] = None
input_ids: list[int]
attention_mask: list[int]
token_type_ids: Optional[list[int]] = None
label_ids: Optional[list[int]] = None
class Split(Enum):
@ -67,17 +66,17 @@ class Split(Enum):
class TokenClassificationTask:
@staticmethod
def read_examples_from_file(data_dir, mode: Union[Split, str]) -> List[InputExample]:
def read_examples_from_file(data_dir, mode: Union[Split, str]) -> list[InputExample]:
raise NotImplementedError
@staticmethod
def get_labels(path: str) -> List[str]:
def get_labels(path: str) -> list[str]:
raise NotImplementedError
@staticmethod
def convert_examples_to_features(
examples: List[InputExample],
label_list: List[str],
examples: list[InputExample],
label_list: list[str],
max_seq_length: int,
tokenizer: PreTrainedTokenizer,
cls_token_at_end=False,
@ -91,7 +90,7 @@ class TokenClassificationTask:
pad_token_label_id=-100,
sequence_a_segment_id=0,
mask_padding_with_zero=True,
) -> List[InputFeatures]:
) -> list[InputFeatures]:
"""Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
@ -214,7 +213,7 @@ if is_torch_available():
soon.
"""
features: List[InputFeatures]
features: list[InputFeatures]
pad_token_label_id: int = nn.CrossEntropyLoss().ignore_index
# Use cross entropy ignore_index as padding label id so that only
# real label ids contribute to the loss later.
@ -224,7 +223,7 @@ if is_torch_available():
token_classification_task: TokenClassificationTask,
data_dir: str,
tokenizer: PreTrainedTokenizer,
labels: List[str],
labels: list[str],
model_type: str,
max_seq_length: Optional[int] = None,
overwrite_cache=False,
@ -233,7 +232,7 @@ if is_torch_available():
# Load data features from cache or dataset file
cached_features_file = os.path.join(
data_dir,
"cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
f"cached_{mode.value}_{tokenizer.__class__.__name__}_{str(max_seq_length)}",
)
# Make sure only the first process in distributed training processes the dataset,
@ -283,7 +282,7 @@ if is_tf_available():
soon.
"""
features: List[InputFeatures]
features: list[InputFeatures]
pad_token_label_id: int = -100
# Use cross entropy ignore_index as padding label id so that only
# real label ids contribute to the loss later.
@ -293,7 +292,7 @@ if is_tf_available():
token_classification_task: TokenClassificationTask,
data_dir: str,
tokenizer: PreTrainedTokenizer,
labels: List[str],
labels: list[str],
model_type: str,
max_seq_length: Optional[int] = None,
overwrite_cache=False,

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the
# modular_new_imgproc_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import numpy as np
import torch
@ -74,13 +74,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
def __init__(
self,
do_resize: bool = True,
size: Dict[str, int] = None,
size: dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BICUBIC,
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: bool = True,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
image_mean: Optional[Union[float, list[float]]] = None,
image_std: Optional[Union[float, list[float]]] = None,
do_convert_rgb: bool = True,
**kwargs,
) -> None:
@ -101,7 +101,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
def resize(
self,
image: np.ndarray,
size: Dict[str, int],
size: dict[str, int],
resample: PILImageResampling = PILImageResampling.BICUBIC,
data_format: Optional[Union[str, ChannelDimension]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
@ -151,13 +151,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
self,
images: ImageInput,
do_resize: Optional[bool] = None,
size: Optional[Dict[str, int]] = None,
size: Optional[dict[str, int]] = None,
resample: PILImageResampling = None,
do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
image_mean: Optional[Union[float, list[float]]] = None,
image_std: Optional[Union[float, list[float]]] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
do_convert_rgb: bool = None,
data_format: ChannelDimension = ChannelDimension.FIRST,

View File

@ -5,7 +5,7 @@
# modular_add_function.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Note that zamba does not have the `apply_rotary_pos_emb` function!
from typing import Optional, Tuple
from typing import Optional
import torch
from torch import nn
@ -62,5 +62,5 @@ class TestAttention(nn.Module):
def __init__(self):
pass
def forward(self) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
def forward(self) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
_ = apply_rotary_pos_emb(1, 1, 1, 1)

View File

@ -5,7 +5,7 @@
# modular_dummy.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from functools import partial
from typing import Callable, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
from torch import nn
@ -223,12 +223,12 @@ class DummyAttention(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)
@ -290,9 +290,9 @@ class DummyDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
@ -494,7 +494,7 @@ class DummyModel(DummyPreTrainedModel):
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
) -> Union[Tuple, BaseModelOutputWithPast]:
) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

View File

@ -6,7 +6,7 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
import math
import os
from typing import List, Optional, Tuple, Union
from typing import Optional, Union
import torch
from packaging import version
@ -136,9 +136,9 @@ class DummyBertSelfAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
mixed_query_layer = self.query(hidden_states)
# If this is instantiated as a cross-attention module, the keys
@ -245,9 +245,9 @@ class DummyBertSdpaSelfAttention(DummyBertSelfAttention):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
logger.warning_once(
@ -386,9 +386,9 @@ class DummyBertAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
self_outputs = self.self(
hidden_states,
attention_mask,
@ -454,9 +454,9 @@ class DummyBertLayer(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
self_attention_outputs = self.attention(
@ -532,12 +532,12 @@ class DummyBertEncoder(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -858,12 +858,12 @@ class DummyBertModel(DummyBertPreTrainedModel):
inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the
# modular_from_uppercase_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Optional, Tuple
from typing import Optional
import torch
from torch import nn
@ -53,7 +53,7 @@ class FromUppercaseModelAttention(nn.Module):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel"""
bsz, tgt_len, embed_dim = hidden_states.size()
@ -148,7 +148,7 @@ class FromUppercaseModelFlashAttention2(FromUppercaseModelAttention):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
output_attentions = False
batch_size, q_len, _ = hidden_states.size()
@ -226,7 +226,7 @@ class FromUppercaseModelSdpaAttention(FromUppercaseModelAttention):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
if output_attentions:
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
logger.warning_once(
@ -322,7 +322,7 @@ class FromUppercaseModelEncoderLayer(nn.Module):
attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.FloatTensor]:
) -> tuple[torch.FloatTensor]:
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

View File

@ -5,7 +5,7 @@
# modular_multimodal1.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from functools import partial
from typing import Callable, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
from torch import nn
@ -223,12 +223,12 @@ class Multimodal1TextAttention(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)
@ -290,9 +290,9 @@ class Multimodal1TextDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
@ -494,7 +494,7 @@ class Multimodal1TextModel(Multimodal1TextPreTrainedModel):
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
) -> Union[Tuple, BaseModelOutputWithPast]:
) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

View File

@ -5,7 +5,7 @@
# modular_multimodal2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Optional, Tuple, Union
from typing import Optional, Union
import torch
from torch import nn
@ -65,7 +65,7 @@ class Multimodal2VisionAttention(nn.Module):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel"""
bsz, tgt_len, embed_dim = hidden_states.size()
@ -152,7 +152,7 @@ class Multimodal2VisionSdpaAttention(Multimodal2VisionAttention):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
if output_attentions:
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
logger.warning_once(
@ -233,7 +233,7 @@ class Multimodal2VisionFlashAttention2(Multimodal2VisionAttention):
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
output_attentions = False
batch_size, q_len, _ = hidden_states.size()
@ -334,7 +334,7 @@ class Multimodal2VisionEncoderLayer(nn.Module):
attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.FloatTensor]:
) -> tuple[torch.FloatTensor]:
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -392,7 +392,7 @@ class Multimodal2VisionEncoder(nn.Module):
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutput]:
) -> Union[tuple, BaseModelOutput]:
r"""
Args:
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
@ -587,7 +587,7 @@ class Multimodal2VisionTransformer(nn.Module):
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = False,
) -> Union[Tuple, BaseModelOutputWithPooling]:
) -> Union[tuple, BaseModelOutputWithPooling]:
r"""
Returns:
@ -671,7 +671,7 @@ class Multimodal2VisionModel(Multimodal2VisionPreTrainedModel):
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: bool = False,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPooling]:
) -> Union[tuple, BaseModelOutputWithPooling]:
r"""
Returns:

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the
# modular_my_new_model2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
from torch import nn
@ -222,12 +222,12 @@ class MyNewModel2Attention(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)
@ -289,9 +289,9 @@ class MyNewModel2DecoderLayer(nn.Module):
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
@ -485,7 +485,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
@ -493,7 +493,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs, # NOOP kwarg for now
) -> Union[Tuple, BaseModelOutputWithPast]:
) -> Union[tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -753,14 +753,14 @@ class MyNewModel2ForSequenceClassification(MyNewModel2PreTrainedModel):
input_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, SequenceClassifierOutputWithPast]:
) -> Union[tuple, SequenceClassifierOutputWithPast]:
r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -5,7 +5,7 @@
# modular_new_task_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from dataclasses import dataclass
from typing import ClassVar, List, Optional, Tuple, Union
from typing import ClassVar, Optional, Union
import torch
from torch import nn
@ -61,9 +61,9 @@ class NewTaskModelCausalLMOutputWithPast(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None
hidden_states: Optional[tuple[torch.FloatTensor]] = None
attentions: Optional[tuple[torch.FloatTensor]] = None
image_hidden_states: Optional[torch.FloatTensor] = None
@ -337,7 +337,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
pixel_values: torch.FloatTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None,
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
token_type_ids: Optional[torch.LongTensor] = None,
cache_position: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
@ -347,7 +347,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
num_logits_to_keep: int = 0,
) -> Union[Tuple, NewTaskModelCausalLMOutputWithPast]:
) -> Union[tuple, NewTaskModelCausalLMOutputWithPast]:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -6,7 +6,7 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
import math
import os
from typing import List, Optional, Tuple, Union
from typing import Optional, Union
import torch
import torch.nn as nn
@ -139,9 +139,9 @@ class RobertaSelfAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
mixed_query_layer = self.query(hidden_states)
# If this is instantiated as a cross-attention module, the keys
@ -248,9 +248,9 @@ class RobertaSdpaSelfAttention(RobertaSelfAttention):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
logger.warning_once(
@ -389,9 +389,9 @@ class RobertaAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
self_outputs = self.self(
hidden_states,
attention_mask,
@ -457,9 +457,9 @@ class RobertaLayer(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
output_attentions: Optional[bool] = False,
) -> Tuple[torch.Tensor]:
) -> tuple[torch.Tensor]:
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
self_attention_outputs = self.attention(
@ -535,12 +535,12 @@ class RobertaEncoder(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -861,12 +861,12 @@ class RobertaModel(RobertaPreTrainedModel):
inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -4,7 +4,7 @@
# the file from the modular. If any change should be done, please apply the change to the
# modular_super.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
from torch import nn
@ -222,12 +222,12 @@ class SuperAttention(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)
@ -289,9 +289,9 @@ class SuperDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
@ -485,14 +485,14 @@ class SuperModel(SuperPreTrainedModel):
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
) -> Union[Tuple, BaseModelOutputWithPast]:
) -> Union[tuple, BaseModelOutputWithPast]:
out = super().forward(
input_ids,
attention_mask,

View File

@ -5,7 +5,7 @@
# modular_switch_function.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Note that llama and cohere have different definitions for rotate_half
from typing import Callable, Optional, Tuple
from typing import Callable, Optional
import torch
from torch import nn
@ -123,12 +123,12 @@ class SwitchFunctionAttention(nn.Module):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)

View File

@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Union
from typing import Optional, Union
import torch
@ -18,10 +18,10 @@ class DummyBertModel(BertModel):
inputs_embeds: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
past_key_values: Optional[list[torch.FloatTensor]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
return super().forward(input_ids)

View File

@ -1,4 +1,4 @@
from typing import ClassVar, List, Optional, Union
from typing import ClassVar, Optional, Union
import torch
import torch.utils.checkpoint
@ -29,7 +29,7 @@ class NewTaskModelForNewTask(PaliGemmaForConditionalGeneration):
pixel_values: torch.FloatTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None,
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
token_type_ids: Optional[torch.LongTensor] = None,
cache_position: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,

View File

@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Union
from typing import Optional, Union
import torch
@ -15,14 +15,14 @@ class SuperModel(LlamaModel):
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:
) -> Union[tuple, CausalLMOutputWithPast]:
out = super().forward(
input_ids,
attention_mask,

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -18,9 +17,10 @@
import logging
import os
import sys
from collections.abc import Mapping
from dataclasses import dataclass, field
from functools import partial
from typing import Any, Dict, List, Mapping, Optional
from typing import Any, Optional
import albumentations as A
import numpy as np
@ -200,7 +200,7 @@ class Evaluator:
def reset_metric(self):
self.metric.reset()
def postprocess_target_batch(self, target_batch) -> List[Dict[str, torch.Tensor]]:
def postprocess_target_batch(self, target_batch) -> list[dict[str, torch.Tensor]]:
"""Collect targets in a form of list of dictionaries with keys "masks", "labels"."""
batch_masks = target_batch[0]
batch_labels = target_batch[1]
@ -214,13 +214,13 @@ class Evaluator:
)
return post_processed_targets
def get_target_sizes(self, post_processed_targets) -> List[List[int]]:
def get_target_sizes(self, post_processed_targets) -> list[list[int]]:
target_sizes = []
for target in post_processed_targets:
target_sizes.append(target["masks"].shape[-2:])
return target_sizes
def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> List[Dict[str, torch.Tensor]]:
def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> list[dict[str, torch.Tensor]]:
"""Collect predictions in a form of list of dictionaries with keys "masks", "labels", "scores"."""
model_output = ModelOutput(class_queries_logits=prediction_batch[0], masks_queries_logits=prediction_batch[1])

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -21,9 +20,10 @@ import logging
import math
import os
import sys
from collections.abc import Mapping
from functools import partial
from pathlib import Path
from typing import Any, Mapping
from typing import Any
import albumentations as A
import datasets

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -551,7 +550,7 @@ def main():
covariance_matrix=1e-5 * sigma,
)
new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))),
tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0,
)
else:
@ -571,7 +570,7 @@ def main():
covariance_matrix=1e-5 * sigma,
)
new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))),
tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0,
)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -518,7 +517,7 @@ def main():
covariance_matrix=1e-5 * sigma,
)
new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))),
tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0,
)
else:
@ -538,7 +537,7 @@ def main():
covariance_matrix=1e-5 * sigma,
)
new_token_embeddings = torch.stack(
tuple((dist.sample() for _ in range(len(special_tokens)))),
tuple(dist.sample() for _ in range(len(special_tokens))),
dim=0,
)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -18,9 +17,10 @@
import logging
import os
import sys
from collections.abc import Mapping
from dataclasses import dataclass, field
from functools import partial
from typing import Any, List, Mapping, Optional, Tuple, Union
from typing import Any, Optional, Union
import albumentations as A
import numpy as np
@ -60,7 +60,7 @@ class ModelOutput:
def format_image_annotations_as_coco(
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]]
image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
) -> dict:
"""Format one set of image annotations to the COCO format
@ -94,7 +94,7 @@ def format_image_annotations_as_coco(
}
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
"""
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
@ -148,7 +148,7 @@ def augment_and_transform_batch(
return result
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]:
def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
data = {}
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
data["labels"] = [x["labels"] for x in batch]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -19,9 +18,10 @@ import json
import logging
import math
import os
from collections.abc import Mapping
from functools import partial
from pathlib import Path
from typing import Any, List, Mapping, Tuple, Union
from typing import Any, Union
import albumentations as A
import datasets
@ -61,7 +61,7 @@ require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/sema
# Copied from examples/pytorch/object-detection/run_object_detection.format_image_annotations_as_coco
def format_image_annotations_as_coco(
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]]
image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
) -> dict:
"""Format one set of image annotations to the COCO format
@ -96,7 +96,7 @@ def format_image_annotations_as_coco(
# Copied from examples/pytorch/object-detection/run_object_detection.convert_bbox_yolo_to_pascal
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
"""
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
@ -152,7 +152,7 @@ def augment_and_transform_batch(
# Copied from examples/pytorch/object-detection/run_object_detection.collate_fn
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]:
def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
data = {}
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
data["labels"] = [x["labels"] for x in batch]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -33,7 +32,7 @@ def get_results(output_dir):
results = {}
path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path):
with open(path, "r") as f:
with open(path) as f:
results = json.load(f)
else:
raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -22,7 +21,7 @@ import logging
import os
import sys
from dataclasses import dataclass, field
from typing import List, Optional, Tuple
from typing import Optional
import datasets
import evaluate
@ -469,7 +468,7 @@ def main():
question_column: str,
context_column: str,
answer_column: str,
) -> Tuple[List[str], List[str]]:
) -> tuple[list[str], list[str]]:
questions = examples[question_column]
contexts = examples[context_column]
answers = examples[answer_column]

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -18,7 +17,7 @@ A subclass of `Trainer` specific to Question-Answering tasks
import math
import time
from typing import Dict, List, Optional
from typing import Optional
from torch.utils.data import Dataset
@ -42,10 +41,10 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
self,
eval_dataset: Optional[Dataset] = None,
eval_examples=None,
ignore_keys: Optional[List[str]] = None,
ignore_keys: Optional[list[str]] = None,
metric_key_prefix: str = "eval",
**gen_kwargs,
) -> Dict[str, float]:
) -> dict[str, float]:
gen_kwargs = gen_kwargs.copy()
# Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import collections
import json
import logging
import os
from typing import Optional, Tuple
from typing import Optional
import numpy as np
from tqdm.auto import tqdm
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
def postprocess_qa_predictions(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
def postprocess_qa_predictions_with_beam_search(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -258,7 +257,7 @@ def main():
else:
repo_id = data_args.dataset_name
filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: str(k) for k, v in id2label.items()}

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -316,7 +315,7 @@ def main():
else:
repo_id = args.dataset_name
filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +19,7 @@ import math
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import datasets
import torch
@ -328,7 +327,7 @@ class DataCollatorForWav2Vec2Pretraining:
mask_time_prob: Optional[float] = 0.65
mask_time_length: Optional[int] = 10
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# reformat list to dict and set to pytorch format
batch = self.feature_extractor.pad(
features,
@ -716,7 +715,7 @@ def main():
}
log_str = ""
for k, v in train_logs.items():
log_str += "| {}: {:.3e}".format(k, v.item())
log_str += f"| {k}: {v.item():.3e}"
if accelerator.is_local_main_process:
progress_bar.write(log_str)
@ -773,7 +772,7 @@ def main():
log_str = ""
for k, v in val_logs.items():
log_str += "| {}: {:.3e}".format(k, v.item())
log_str += f"| {k}: {v.item():.3e}"
if accelerator.is_local_main_process:
progress_bar.write(log_str)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -24,7 +23,7 @@ import re
import sys
import warnings
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import datasets
import evaluate
@ -211,11 +210,11 @@ class DataTrainingArguments:
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
chars_to_ignore: Optional[list[str]] = list_field(
default=None,
metadata={"help": "A list of characters to remove from the transcripts."},
)
eval_metrics: List[str] = list_field(
eval_metrics: list[str] = list_field(
default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
)
@ -318,7 +317,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of_labels: Optional[int] = None
feature_extractor_input_name: Optional[str] = "input_values"
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
input_features = [

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -24,7 +23,7 @@ import re
import sys
import warnings
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import datasets
import evaluate
@ -201,11 +200,11 @@ class DataTrainingArguments:
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
chars_to_ignore: Optional[list[str]] = list_field(
default=None,
metadata={"help": "A list of characters to remove from the transcripts."},
)
eval_metrics: List[str] = list_field(
eval_metrics: list[str] = list_field(
default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
)
@ -300,7 +299,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
input_features = [{"input_values": feature["input_values"]} for feature in features]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -23,7 +22,7 @@ import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from typing import Any, Optional, Union
import datasets
import evaluate
@ -110,11 +109,11 @@ class ModelArguments:
freeze_encoder: bool = field(
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
)
forced_decoder_ids: List[List[int]] = field(
forced_decoder_ids: list[list[int]] = field(
default=None,
metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."},
)
suppress_tokens: List[int] = field(
suppress_tokens: list[int] = field(
default=None,
metadata={
"help": (
@ -247,7 +246,7 @@ class DataCollatorSpeechSeq2SeqWithPadding:
decoder_start_token_id: int
forward_attention_mask: bool
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
model_input_name = self.processor.model_input_names[0]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -51,7 +50,7 @@ def get_results(output_dir):
results = {}
path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path):
with open(path, "r") as f:
with open(path) as f:
results = json.load(f)
else:
raise ValueError(f"can't find {path}")

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -87,7 +86,7 @@ def get_results(output_dir):
results = {}
path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path):
with open(path, "r") as f:
with open(path) as f:
results = json.load(f)
else:
raise ValueError(f"can't find {path}")

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -21,7 +20,7 @@ import os
import random
import sys
from dataclasses import dataclass, field
from typing import List, Optional
from typing import Optional
import datasets
import evaluate
@ -256,7 +255,7 @@ class ModelArguments:
)
def get_label_list(raw_dataset, split="train") -> List[str]:
def get_label_list(raw_dataset, split="train") -> list[str]:
"""Get the list of labels from a multi-label dataset"""
if isinstance(raw_dataset[split]["label"][0], list):
@ -537,7 +536,7 @@ def main():
model.config.id2label = {id: label for label, id in label_to_id.items()}
elif not is_regression: # classification, but not training
logger.info("using label infos in the model config")
logger.info("label2id: {}".format(model.config.label2id))
logger.info(f"label2id: {model.config.label2id}")
label_to_id = model.config.label2id
else: # regression
label_to_id = None
@ -549,7 +548,7 @@ def main():
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
def multi_labels_to_ids(labels: List[str]) -> List[float]:
def multi_labels_to_ids(labels: list[str]) -> list[float]:
ids = [0.0] * len(label_to_id) # BCELoss requires float as target type
for label in labels:
ids[label_to_id[label]] = 1.0
@ -735,7 +734,7 @@ def main():
else:
item = label_list[item]
writer.write(f"{index}\t{item}\n")
logger.info("Predict results saved at {}".format(output_predict_file))
logger.info(f"Predict results saved at {output_predict_file}")
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
if training_args.push_to_hub:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
@ -19,7 +18,6 @@
import argparse
import inspect
import logging
from typing import Tuple
import torch
from accelerate import PartialState
@ -271,8 +269,8 @@ class _ModelFallbackWrapper(GenerationMixin):
)
def _reorder_cache(
self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
) -> Tuple[Tuple[torch.Tensor]]:
self, past_key_values: tuple[tuple[torch.Tensor]], beam_idx: torch.Tensor
) -> tuple[tuple[torch.Tensor]]:
"""
This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or
[`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 University of Cambridge, Tencent AI Lab, DeepMind and The University of Hong Kong Authors and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

Some files were not shown because too many files have changed in this diff Show More