diff --git a/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py b/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py index 0ebd1464883..557bfcd27a3 100644 --- a/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py +++ b/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index 879372a7523..f156057212e 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -833,8 +832,7 @@ def main(): # No need to shuffle here loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False) - for batch in loader: - yield batch + yield from loader # Metric metric = evaluate.load("rouge", cache_dir=model_args.cache_dir) diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index 1f87eedd8a6..2bbb66a24ac 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain from pathlib import Path -from typing import Dict, List, Optional +from typing import Optional import flax import jax @@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM: " language modeling. " ) - def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding: + def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding: # convert list to dict and tensorize input batch = BatchEncoding( {k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()} diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index c486aae71f6..fb3ab65dc8b 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 4d837e9c113..9b83c8db394 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,7 +32,7 @@ from itertools import chain # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Optional import flax import jax @@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling: "You should pass `mlm=False` to train on causal language modeling instead." ) - def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]: + def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]: # Handle dict or lists with proper padding and conversion to tensor. batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY) @@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling: def mask_tokens( self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray] - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index c133824fcc2..b376c26d32c 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain from pathlib import Path -from typing import Dict, List, Optional +from typing import Optional import flax import jax @@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM: pad_token_id: int decoder_start_token_id: int - def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding: + def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding: # convert list to dict and tensorize input batch = BatchEncoding( {k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()} diff --git a/examples/flax/language-modeling/t5_tokenizer_model.py b/examples/flax/language-modeling/t5_tokenizer_model.py index a2be4afc946..8b988527966 100755 --- a/examples/flax/language-modeling/t5_tokenizer_model.py +++ b/examples/flax/language-modeling/t5_tokenizer_model.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import json -from typing import Iterator, List, Union +from collections.abc import Iterator +from typing import Union from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers from tokenizers.implementations.base_tokenizer import BaseTokenizer @@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer): def train( self, - files: Union[str, List[str]], + files: Union[str, list[str]], vocab_size: int = 8000, show_progress: bool = True, ): diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 4fc3ad9cbd7..eb506fff048 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,7 +27,7 @@ import time from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Any, Callable, Optional import datasets import evaluate @@ -908,8 +907,8 @@ def main(): # region Define train step functions def train_step( - state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey - ) -> Tuple[train_state.TrainState, float]: + state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey + ) -> tuple[train_state.TrainState, float]: """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) start_positions = batch.pop("start_positions") diff --git a/examples/flax/question-answering/utils_qa.py b/examples/flax/question-answering/utils_qa.py index 79497dbb816..f0cc5c26a69 100644 --- a/examples/flax/question-answering/utils_qa.py +++ b/examples/flax/question-answering/utils_qa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +19,7 @@ import collections import json import logging import os -from typing import Optional, Tuple +from typing import Optional import numpy as np from tqdm.auto import tqdm @@ -32,7 +31,7 @@ logger = logging.getLogger(__name__) def postprocess_qa_predictions( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -223,7 +222,7 @@ def postprocess_qa_predictions( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" @@ -253,7 +252,7 @@ def postprocess_qa_predictions( def postprocess_qa_predictions_with_beam_search( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" diff --git a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py index f4cd796fcbd..e70618eef0c 100644 --- a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py +++ b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,7 +24,7 @@ import time from dataclasses import field from functools import partial from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Optional, Union import datasets import evaluate @@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding: pad_input_to_multiple_of: Optional[int] = None pad_target_to_multiple_of: Optional[int] = None - def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]: # split inputs and labels since they have to be of different lengths and need # different padding methods model_input_name = self.processor.model_input_names[0] diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 36407df3b41..aab44c88a02 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/flax/test_flax_examples.py b/examples/flax/test_flax_examples.py index c81d6378185..132be94e318 100644 --- a/examples/flax/test_flax_examples.py +++ b/examples/flax/test_flax_examples.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2021 HuggingFace Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -64,7 +63,7 @@ def get_setup_file(): def get_results(output_dir, split="eval"): path = os.path.join(output_dir, f"{split}_results.json") if os.path.exists(path): - with open(path, "r") as f: + with open(path) as f: return json.load(f) raise ValueError(f"can't find {path}") diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 66a694ce1b5..b5378b8c117 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,7 +24,7 @@ import time import warnings from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Any, Callable, Optional import datasets import evaluate @@ -572,8 +571,8 @@ def main(): # define step functions def train_step( - state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey - ) -> Tuple[train_state.TrainState, float]: + state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey + ) -> tuple[train_state.TrainState, float]: """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) targets = batch.pop("labels") diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index a966d9842c8..854d7c71366 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain from pathlib import Path -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Any, Callable, Optional import datasets import evaluate @@ -651,8 +650,8 @@ def main(): # define step functions def train_step( - state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey - ) -> Tuple[train_state.TrainState, float]: + state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey + ) -> tuple[train_state.TrainState, float]: """Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`.""" dropout_rng, new_dropout_rng = jax.random.split(dropout_rng) targets = batch.pop("labels") diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 4b50352f580..4eddd36f962 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/legacy/benchmarking/plot_csv_file.py b/examples/legacy/benchmarking/plot_csv_file.py index aa092f5c047..d802dbe67e8 100644 --- a/examples/legacy/benchmarking/plot_csv_file.py +++ b/examples/legacy/benchmarking/plot_csv_file.py @@ -15,7 +15,7 @@ import csv from collections import defaultdict from dataclasses import dataclass, field -from typing import List, Optional +from typing import Optional import matplotlib.pyplot as plt import numpy as np @@ -59,7 +59,7 @@ class PlotArguments: default=None, metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."}, ) - short_model_names: Optional[List[str]] = list_field( + short_model_names: Optional[list[str]] = list_field( default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."} ) diff --git a/examples/legacy/benchmarking/run_benchmark.py b/examples/legacy/benchmarking/run_benchmark.py index 85f266566bf..1bdd69bbe29 100644 --- a/examples/legacy/benchmarking/run_benchmark.py +++ b/examples/legacy/benchmarking/run_benchmark.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # diff --git a/examples/legacy/multiple_choice/run_multiple_choice.py b/examples/legacy/multiple_choice/run_multiple_choice.py index fece480cad0..aa1297656a9 100644 --- a/examples/legacy/multiple_choice/run_multiple_choice.py +++ b/examples/legacy/multiple_choice/run_multiple_choice.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -18,7 +17,7 @@ import logging import os from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Optional import numpy as np from utils_multiple_choice import MultipleChoiceDataset, Split, processors @@ -187,7 +186,7 @@ def main(): else None ) - def compute_metrics(p: EvalPrediction) -> Dict: + def compute_metrics(p: EvalPrediction) -> dict: preds = np.argmax(p.predictions, axis=1) return {"acc": simple_accuracy(preds, p.label_ids)} @@ -228,7 +227,7 @@ def main(): logger.info("***** Eval results *****") for key, value in result.items(): logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + writer.write("{} = {}\n".format(key, value)) results.update(result) diff --git a/examples/legacy/multiple_choice/utils_multiple_choice.py b/examples/legacy/multiple_choice/utils_multiple_choice.py index 2ee901c1bc8..ddae47e5880 100644 --- a/examples/legacy/multiple_choice/utils_multiple_choice.py +++ b/examples/legacy/multiple_choice/utils_multiple_choice.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -22,7 +21,7 @@ import logging import os from dataclasses import dataclass from enum import Enum -from typing import List, Optional +from typing import Optional import tqdm from filelock import FileLock @@ -49,8 +48,8 @@ class InputExample: example_id: str question: str - contexts: List[str] - endings: List[str] + contexts: list[str] + endings: list[str] label: Optional[str] @@ -62,9 +61,9 @@ class InputFeatures: """ example_id: str - input_ids: List[List[int]] - attention_mask: Optional[List[List[int]]] - token_type_ids: Optional[List[List[int]]] + input_ids: list[list[int]] + attention_mask: Optional[list[list[int]]] + token_type_ids: Optional[list[list[int]]] label: Optional[int] @@ -84,7 +83,7 @@ if is_torch_available(): soon. """ - features: List[InputFeatures] + features: list[InputFeatures] def __init__( self, @@ -149,7 +148,7 @@ if is_tf_available(): soon. """ - features: List[InputFeatures] + features: list[InputFeatures] def __init__( self, @@ -253,7 +252,7 @@ class RaceProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") high = os.path.join(data_dir, "train/high") middle = os.path.join(data_dir, "train/middle") high = self._read_txt(high) @@ -262,7 +261,7 @@ class RaceProcessor(DataProcessor): def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") high = os.path.join(data_dir, "dev/high") middle = os.path.join(data_dir, "dev/middle") high = self._read_txt(high) @@ -271,7 +270,7 @@ class RaceProcessor(DataProcessor): def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} test".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} test") high = os.path.join(data_dir, "test/high") middle = os.path.join(data_dir, "test/middle") high = self._read_txt(high) @@ -286,7 +285,7 @@ class RaceProcessor(DataProcessor): lines = [] files = glob.glob(input_dir + "/*txt") for file in tqdm.tqdm(files, desc="read files"): - with open(file, "r", encoding="utf-8") as fin: + with open(file, encoding="utf-8") as fin: data_raw = json.load(fin) data_raw["race_id"] = file lines.append(data_raw) @@ -296,7 +295,7 @@ class RaceProcessor(DataProcessor): """Creates examples for the training and dev sets.""" examples = [] for _, data_raw in enumerate(lines): - race_id = "%s-%s" % (set_type, data_raw["race_id"]) + race_id = "{}-{}".format(set_type, data_raw["race_id"]) article = data_raw["article"] for i in range(len(data_raw["answers"])): truth = str(ord(data_raw["answers"][i]) - ord("A")) @@ -320,17 +319,17 @@ class SynonymProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev") def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test") @@ -339,10 +338,10 @@ class SynonymProcessor(DataProcessor): return ["0", "1", "2", "3", "4"] def _read_csv(self, input_file): - with open(input_file, "r", encoding="utf-8") as f: + with open(input_file, encoding="utf-8") as f: return list(csv.reader(f)) - def _create_examples(self, lines: List[List[str]], type: str): + def _create_examples(self, lines: list[list[str]], type: str): """Creates examples for the training and dev sets.""" examples = [ @@ -366,17 +365,17 @@ class SwagProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev") def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") raise ValueError( "For swag testing, the input file does not contain a label column. It can not be tested in current code " "setting!" @@ -388,10 +387,10 @@ class SwagProcessor(DataProcessor): return ["0", "1", "2", "3"] def _read_csv(self, input_file): - with open(input_file, "r", encoding="utf-8") as f: + with open(input_file, encoding="utf-8") as f: return list(csv.reader(f)) - def _create_examples(self, lines: List[List[str]], type: str): + def _create_examples(self, lines: list[list[str]], type: str): """Creates examples for the training and dev sets.""" if type == "train" and lines[0][-1] != "label": raise ValueError("For training, the input file must contain a label column.") @@ -417,16 +416,16 @@ class ArcProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev") def get_test_examples(self, data_dir): - logger.info("LOOKING AT {} test".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} test") return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test") def get_labels(self): @@ -434,7 +433,7 @@ class ArcProcessor(DataProcessor): return ["0", "1", "2", "3"] def _read_json(self, input_file): - with open(input_file, "r", encoding="utf-8") as fin: + with open(input_file, encoding="utf-8") as fin: lines = fin.readlines() return lines @@ -504,11 +503,11 @@ class ArcProcessor(DataProcessor): def convert_examples_to_features( - examples: List[InputExample], - label_list: List[str], + examples: list[InputExample], + label_list: list[str], max_length: int, tokenizer: PreTrainedTokenizer, -) -> List[InputFeatures]: +) -> list[InputFeatures]: """ Loads a data file into a list of `InputFeatures` """ diff --git a/examples/legacy/pytorch-lightning/lightning_base.py b/examples/legacy/pytorch-lightning/lightning_base.py index 7eb1e7831f6..5c34fbe7e07 100644 --- a/examples/legacy/pytorch-lightning/lightning_base.py +++ b/examples/legacy/pytorch-lightning/lightning_base.py @@ -2,7 +2,7 @@ import argparse import logging import os from pathlib import Path -from typing import Any, Dict +from typing import Any import pytorch_lightning as pl from pytorch_lightning.utilities import rank_zero_info @@ -201,7 +201,7 @@ class BaseTransformer(pl.LightningModule): ) @pl.utilities.rank_zero_only - def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None: + def on_save_checkpoint(self, checkpoint: dict[str, Any]) -> None: save_path = self.output_dir.joinpath("best_tfmr") self.model.config.save_step = self.step_count self.model.save_pretrained(save_path) @@ -282,7 +282,7 @@ class LoggingCallback(pl.Callback): # Log results for key in sorted(metrics): if key not in ["log", "progress_bar"]: - rank_zero_info("{} = {}\n".format(key, str(metrics[key]))) + rank_zero_info(f"{key} = {str(metrics[key])}\n") def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule): rank_zero_info("***** Test results *****") @@ -292,8 +292,8 @@ class LoggingCallback(pl.Callback): with open(output_test_results_file, "w") as writer: for key in sorted(metrics): if key not in ["log", "progress_bar"]: - rank_zero_info("{} = {}\n".format(key, str(metrics[key]))) - writer.write("{} = {}\n".format(key, str(metrics[key]))) + rank_zero_info(f"{key} = {str(metrics[key])}\n") + writer.write(f"{key} = {str(metrics[key])}\n") def add_generic_args(parser, root_dir) -> None: diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py index 757024a0c38..5ba8309fee5 100644 --- a/examples/legacy/question-answering/run_squad.py +++ b/examples/legacy/question-answering/run_squad.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -231,14 +230,14 @@ def train(args, train_dataset, model, tokenizer): if args.local_rank == -1 and args.evaluate_during_training: results = evaluate(args, model, tokenizer) for key, value in results.items(): - tb_writer.add_scalar("eval_{}".format(key), value, global_step) + tb_writer.add_scalar(f"eval_{key}", value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logging_loss = tr_loss # Save model checkpoint if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: - output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step)) + output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}") # Take care of distributed/parallel training model_to_save = model.module if hasattr(model, "module") else model model_to_save.save_pretrained(output_dir) @@ -281,7 +280,7 @@ def evaluate(args, model, tokenizer, prefix=""): model = torch.nn.DataParallel(model) # Eval! - logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(f"***** Running evaluation {prefix} *****") logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", args.eval_batch_size) @@ -348,11 +347,11 @@ def evaluate(args, model, tokenizer, prefix=""): logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset)) # Compute predictions - output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) - output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) + output_prediction_file = os.path.join(args.output_dir, f"predictions_{prefix}.json") + output_nbest_file = os.path.join(args.output_dir, f"nbest_predictions_{prefix}.json") if args.version_2_with_negative: - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + output_null_log_odds_file = os.path.join(args.output_dir, f"null_odds_{prefix}.json") else: output_null_log_odds_file = None @@ -828,10 +827,10 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} + result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()} results.update(result) - logger.info("Results: {}".format(results)) + logger.info(f"Results: {results}") return results diff --git a/examples/legacy/question-answering/run_squad_trainer.py b/examples/legacy/question-answering/run_squad_trainer.py index a27cb76295c..159569c3277 100644 --- a/examples/legacy/question-answering/run_squad_trainer.py +++ b/examples/legacy/question-answering/run_squad_trainer.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # diff --git a/examples/legacy/run_camembert.py b/examples/legacy/run_camembert.py index 67e04babe10..9767ffbf8b3 100755 --- a/examples/legacy/run_camembert.py +++ b/examples/legacy/run_camembert.py @@ -20,10 +20,10 @@ def fill_mask(masked_input, model, tokenizer, topk=5): topk_filled_outputs = [] for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")): predicted_token = predicted_token_bpe.replace("\u2581", " ") - if " {0}".format(masked_token) in masked_input: + if f" {masked_token}" in masked_input: topk_filled_outputs.append( ( - masked_input.replace(" {0}".format(masked_token), predicted_token), + masked_input.replace(f" {masked_token}", predicted_token), values[index].item(), predicted_token, ) diff --git a/examples/legacy/run_chinese_ref.py b/examples/legacy/run_chinese_ref.py index 7d73580aa21..03d195e961c 100755 --- a/examples/legacy/run_chinese_ref.py +++ b/examples/legacy/run_chinese_ref.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import argparse import json -from typing import List from ltp import LTP @@ -42,7 +41,7 @@ def is_chinese(word: str): return 1 -def get_chinese_word(tokens: List[str]): +def get_chinese_word(tokens: list[str]): word_set = set() for token in tokens: @@ -53,7 +52,7 @@ def get_chinese_word(tokens: List[str]): return word_list -def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()): +def add_sub_symbol(bert_tokens: list[str], chinese_word_set: set()): if not chinese_word_set: return bert_tokens max_word_len = max([len(w) for w in chinese_word_set]) @@ -77,7 +76,7 @@ def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()): return bert_word -def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer): +def prepare_ref(lines: list[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer): ltp_res = [] for i in range(0, len(lines), 100): @@ -117,7 +116,7 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni def main(args): # For Chinese (Ro)Bert, the best result is from : RoBERTa-wwm-ext (https://github.com/ymcui/Chinese-BERT-wwm) # If we want to fine-tune these model, we have to use same tokenizer : LTP (https://github.com/HIT-SCIR/ltp) - with open(args.file_name, "r", encoding="utf-8") as f: + with open(args.file_name, encoding="utf-8") as f: data = f.readlines() data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] # avoid delimiter like '\u2029' ltp_tokenizer = LTP(args.ltp) # faster in GPU device diff --git a/examples/legacy/run_language_modeling.py b/examples/legacy/run_language_modeling.py index 317b2dfb48b..8a6b8eded34 100755 --- a/examples/legacy/run_language_modeling.py +++ b/examples/legacy/run_language_modeling.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -358,7 +357,7 @@ def main(): logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) - writer.write("%s = %s\n" % (key, str(result[key]))) + writer.write("{} = {}\n".format(key, str(result[key]))) results.update(result) diff --git a/examples/legacy/run_openai_gpt.py b/examples/legacy/run_openai_gpt.py index 4193cd1824d..62f9d3a3c08 100755 --- a/examples/legacy/run_openai_gpt.py +++ b/examples/legacy/run_openai_gpt.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -163,7 +162,7 @@ def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() - logger.info("device: {}, n_gpu {}".format(device, n_gpu)) + logger.info(f"device: {device}, n_gpu {n_gpu}") if not args.do_train and not args.do_eval: raise ValueError("At least one of `do_train` or `do_eval` must be True.") @@ -261,7 +260,7 @@ def main(): loss.item() if exp_average_loss is None else 0.7 * exp_average_loss + 0.3 * loss.item() ) nb_tr_steps += 1 - tqdm_bar.desc = "Training loss: {:.2e} lr: {:.2e}".format(exp_average_loss, scheduler.get_lr()[0]) + tqdm_bar.desc = f"Training loss: {exp_average_loss:.2e} lr: {scheduler.get_lr()[0]:.2e}" # Save a trained model if args.do_train: @@ -313,7 +312,7 @@ def main(): logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) - writer.write("%s = %s\n" % (key, str(result[key]))) + writer.write("{} = {}\n".format(key, str(result[key]))) if __name__ == "__main__": diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py index 55fd0aa0520..221f9cc9c98 100755 --- a/examples/legacy/run_swag.py +++ b/examples/legacy/run_swag.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -51,7 +50,7 @@ except ImportError: logger = logging.getLogger(__name__) -class SwagExample(object): +class SwagExample: """A single training/test example for the SWAG dataset.""" def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None): @@ -71,22 +70,22 @@ class SwagExample(object): def __repr__(self): attributes = [ - "swag_id: {}".format(self.swag_id), - "context_sentence: {}".format(self.context_sentence), - "start_ending: {}".format(self.start_ending), - "ending_0: {}".format(self.endings[0]), - "ending_1: {}".format(self.endings[1]), - "ending_2: {}".format(self.endings[2]), - "ending_3: {}".format(self.endings[3]), + f"swag_id: {self.swag_id}", + f"context_sentence: {self.context_sentence}", + f"start_ending: {self.start_ending}", + f"ending_0: {self.endings[0]}", + f"ending_1: {self.endings[1]}", + f"ending_2: {self.endings[2]}", + f"ending_3: {self.endings[3]}", ] if self.label is not None: - attributes.append("label: {}".format(self.label)) + attributes.append(f"label: {self.label}") return ", ".join(attributes) -class InputFeatures(object): +class InputFeatures: def __init__(self, example_id, choices_features, label): self.example_id = example_id self.choices_features = [ @@ -97,7 +96,7 @@ class InputFeatures(object): def read_swag_examples(input_file, is_training=True): - with open(input_file, "r", encoding="utf-8") as f: + with open(input_file, encoding="utf-8") as f: lines = list(csv.reader(f)) if is_training and lines[0][-1] != "label": @@ -179,15 +178,15 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, is_trainin label = example.label if example_index < 5: logger.info("*** Example ***") - logger.info("swag_id: {}".format(example.swag_id)) + logger.info(f"swag_id: {example.swag_id}") for choice_idx, (tokens, input_ids, input_mask, segment_ids) in enumerate(choices_features): - logger.info("choice: {}".format(choice_idx)) + logger.info(f"choice: {choice_idx}") logger.info("tokens: {}".format(" ".join(tokens))) logger.info("input_ids: {}".format(" ".join(map(str, input_ids)))) logger.info("input_mask: {}".format(" ".join(map(str, input_mask)))) logger.info("segment_ids: {}".format(" ".join(map(str, segment_ids)))) if is_training: - logger.info("label: {}".format(label)) + logger.info(f"label: {label}") features.append(InputFeatures(example_id=example.swag_id, choices_features=choices_features, label=label)) @@ -382,14 +381,14 @@ def train(args, train_dataset, model, tokenizer): ): # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): - tb_writer.add_scalar("eval_{}".format(key), value, global_step) + tb_writer.add_scalar(f"eval_{key}", value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logging_loss = tr_loss if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: # Save model checkpoint - output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step)) + output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}") model_to_save = ( model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training @@ -423,7 +422,7 @@ def evaluate(args, model, tokenizer, prefix=""): eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! - logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(f"***** Running evaluation {prefix} *****") logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", args.eval_batch_size) @@ -466,7 +465,7 @@ def evaluate(args, model, tokenizer, prefix=""): logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info("%s = %s", key, str(result[key])) - writer.write("%s = %s\n" % (key, str(result[key]))) + writer.write("{} = {}\n".format(key, str(result[key]))) return result @@ -710,10 +709,10 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} + result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()} results.update(result) - logger.info("Results: {}".format(results)) + logger.info(f"Results: {results}") return results diff --git a/examples/legacy/run_transfo_xl.py b/examples/legacy/run_transfo_xl.py index ce24fe13d79..7da9ee7fe9c 100755 --- a/examples/legacy/run_transfo_xl.py +++ b/examples/legacy/run_transfo_xl.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -66,7 +65,7 @@ def main(): ptvsd.wait_for_attach() device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - logger.info("device: {}".format(device)) + logger.info(f"device: {device}") # Load a pre-processed dataset # You can also build the corpus yourself using TransfoXLCorpus methods @@ -111,7 +110,7 @@ def main(): total_loss += seq_len * loss.item() total_len += seq_len total_time = time.time() - start_time - logger.info("Time : {:.2f}s, {:.2f}ms/segment".format(total_time, 1000 * total_time / (idx + 1))) + logger.info(f"Time : {total_time:.2f}s, {1000 * total_time / (idx + 1):.2f}ms/segment") return total_loss / total_len # Run on test data. diff --git a/examples/legacy/seq2seq/old_test_fsmt_bleu_score.py b/examples/legacy/seq2seq/old_test_fsmt_bleu_score.py index 4aefeb388be..93bd9839920 100644 --- a/examples/legacy/seq2seq/old_test_fsmt_bleu_score.py +++ b/examples/legacy/seq2seq/old_test_fsmt_bleu_score.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2020 Huggingface # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io import json import unittest @@ -25,7 +23,7 @@ from utils import calculate_bleu filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json" -with io.open(filename, "r", encoding="utf-8") as f: +with open(filename, encoding="utf-8") as f: bleu_data = json.load(f) diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py index 40a946f81c5..41855eaed6b 100755 --- a/examples/legacy/seq2seq/run_distributed_eval.py +++ b/examples/legacy/seq2seq/run_distributed_eval.py @@ -19,7 +19,6 @@ import time from json import JSONDecodeError from logging import getLogger from pathlib import Path -from typing import Dict, List import torch from torch.utils.data import DataLoader @@ -55,10 +54,10 @@ def eval_data_dir( task="summarization", local_rank=None, num_return_sequences=1, - dataset_kwargs: Dict = None, + dataset_kwargs: dict = None, prefix="", **generate_kwargs, -) -> Dict: +) -> dict: """Run evaluation on part of the data for one gpu and save to {save_dir}/rank_{rank}_output.json""" model_name = str(model_name) assert local_rank is not None @@ -211,7 +210,7 @@ def run_generate(): calc_bleu = "translation" in args.task score_fn = calculate_bleu if calc_bleu else calculate_rouge metric_name = "bleu" if calc_bleu else "rouge" - metrics: Dict = score_fn(preds, labels) + metrics: dict = score_fn(preds, labels) metrics["n_obs"] = len(preds) runtime = time.time() - start_time metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4) @@ -227,7 +226,7 @@ def run_generate(): shutil.rmtree(json_save_dir) -def combine_partial_results(partial_results) -> List: +def combine_partial_results(partial_results) -> list: """Concatenate partial results into one file, then sort it by id.""" records = [] for partial_result in partial_results: @@ -237,7 +236,7 @@ def combine_partial_results(partial_results) -> List: return preds -def gather_results_from_each_node(num_replicas, save_dir, timeout) -> List[Dict[str, List]]: +def gather_results_from_each_node(num_replicas, save_dir, timeout) -> list[dict[str, list]]: # WAIT FOR lots of .json files start_wait = time.time() logger.info("waiting for all nodes to finish") diff --git a/examples/legacy/seq2seq/run_eval.py b/examples/legacy/seq2seq/run_eval.py index f69e5d51264..f5ef4f5d165 100755 --- a/examples/legacy/seq2seq/run_eval.py +++ b/examples/legacy/seq2seq/run_eval.py @@ -20,7 +20,6 @@ import time import warnings from logging import getLogger from pathlib import Path -from typing import Dict, List import torch from tqdm import tqdm @@ -36,7 +35,7 @@ DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu" def generate_summaries_or_translations( - examples: List[str], + examples: list[str], out_file: str, model_name: str, batch_size: int = 8, @@ -45,7 +44,7 @@ def generate_summaries_or_translations( task="summarization", prefix=None, **generate_kwargs, -) -> Dict: +) -> dict: """Save model.generate results to , and return how long it took.""" fout = Path(out_file).open("w", encoding="utf-8") model_name = str(model_name) diff --git a/examples/legacy/seq2seq/run_eval_search.py b/examples/legacy/seq2seq/run_eval_search.py index e6048a4ec44..e911eca5704 100755 --- a/examples/legacy/seq2seq/run_eval_search.py +++ b/examples/legacy/seq2seq/run_eval_search.py @@ -34,7 +34,7 @@ task_score_names = { def parse_search_arg(search): groups = search.split() - entries = dict((g.split("=") for g in groups)) + entries = dict(g.split("=") for g in groups) entry_names = list(entries.keys()) sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()] matrix = [list(x) for x in itertools.product(*sets)] @@ -105,7 +105,7 @@ def run_search(): col_widths = {col: len(str(col)) for col in col_names} results = [] for r in matrix: - hparams = dict((x.replace("--", "").split() for x in r)) + hparams = dict(x.replace("--", "").split() for x in r) args_exp = " ".join(r).split() args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM sys.argv = args_normal + args_exp diff --git a/examples/legacy/seq2seq/seq2seq_trainer.py b/examples/legacy/seq2seq/seq2seq_trainer.py index 8f056ca8e15..afdde6614e2 100644 --- a/examples/legacy/seq2seq/seq2seq_trainer.py +++ b/examples/legacy/seq2seq/seq2seq_trainer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union import torch from torch import nn @@ -172,10 +172,10 @@ class Seq2SeqTrainer(Trainer): def prediction_step( self, model: nn.Module, - inputs: Dict[str, Union[torch.Tensor, Any]], + inputs: dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, - ignore_keys: Optional[List[str]] = None, - ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: + ignore_keys: Optional[list[str]] = None, + ) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: """ Perform an evaluation step on :obj:`model` using obj:`inputs`. diff --git a/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py b/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py index 46487c07ea8..8f518822b45 100755 --- a/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py +++ b/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import io import json import subprocess @@ -29,5 +28,5 @@ def get_all_data(pairs, n_objs): text = get_all_data(pairs, n_objs) filename = "./fsmt_val_data.json" -with io.open(filename, "w", encoding="utf-8") as f: +with open(filename, "w", encoding="utf-8") as f: bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) diff --git a/examples/legacy/seq2seq/utils.py b/examples/legacy/seq2seq/utils.py index 955c9e99610..001300f1869 100644 --- a/examples/legacy/seq2seq/utils.py +++ b/examples/legacy/seq2seq/utils.py @@ -19,9 +19,10 @@ import math import os import pickle import socket +from collections.abc import Iterable from logging import getLogger from pathlib import Path -from typing import Callable, Dict, Iterable, List, Tuple, Union +from typing import Callable, Union import git import numpy as np @@ -67,7 +68,7 @@ def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=-100): return loss, nll_loss -def lmap(f: Callable, x: Iterable) -> List: +def lmap(f: Callable, x: Iterable) -> list: """list(map(f, x))""" return list(map(f, x)) @@ -77,11 +78,11 @@ def calculate_bleu(output_lns, refs_lns, **kwargs) -> dict: return {"bleu": round(corpus_bleu(output_lns, [refs_lns], **kwargs).score, 4)} -def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], Dict]: +def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], dict]: def non_pad_len(tokens: np.ndarray) -> int: return np.count_nonzero(tokens != tokenizer.pad_token_id) - def decode_pred(pred: EvalPrediction) -> Tuple[List[str], List[str]]: + def decode_pred(pred: EvalPrediction) -> tuple[list[str], list[str]]: pred_ids = pred.predictions label_ids = pred.label_ids pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True) @@ -91,16 +92,16 @@ def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> label_str = lmap(str.strip, label_str) return pred_str, label_str - def summarization_metrics(pred: EvalPrediction) -> Dict: + def summarization_metrics(pred: EvalPrediction) -> dict: pred_str, label_str = decode_pred(pred) - rouge: Dict = calculate_rouge(pred_str, label_str) + rouge: dict = calculate_rouge(pred_str, label_str) summ_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1) rouge.update({"gen_len": summ_len}) return rouge - def translation_metrics(pred: EvalPrediction) -> Dict: + def translation_metrics(pred: EvalPrediction) -> dict: pred_str, label_str = decode_pred(pred) - bleu: Dict = calculate_bleu(pred_str, label_str) + bleu: dict = calculate_bleu(pred_str, label_str) gen_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1) bleu.update({"gen_len": gen_len}) return bleu @@ -183,7 +184,7 @@ class AbstractSeq2SeqDataset(Dataset): return min(self.src_lens[i], self.max_target_length) # call fairseq cython function - batch_sampler: List[List[int]] = batch_by_size( + batch_sampler: list[list[int]] = batch_by_size( sorted_indices, num_tokens_fn=num_tokens_in_example, max_tokens=max_tokens_per_batch, @@ -207,7 +208,7 @@ class AbstractSeq2SeqDataset(Dataset): class LegacySeq2SeqDataset(AbstractSeq2SeqDataset): - def __getitem__(self, index) -> Dict[str, torch.Tensor]: + def __getitem__(self, index) -> dict[str, torch.Tensor]: """Call tokenizer on src and tgt_lines""" index = index + 1 # linecache starts at 1 source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n") @@ -237,7 +238,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset): **self.dataset_kwargs, ) - def collate_fn(self, batch) -> Dict[str, torch.Tensor]: + def collate_fn(self, batch) -> dict[str, torch.Tensor]: input_ids = torch.stack([x["input_ids"] for x in batch]) masks = torch.stack([x["attention_mask"] for x in batch]) target_ids = torch.stack([x["labels"] for x in batch]) @@ -255,7 +256,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset): class Seq2SeqDataset(AbstractSeq2SeqDataset): """A dataset that calls prepare_seq2seq_batch.""" - def __getitem__(self, index) -> Dict[str, str]: + def __getitem__(self, index) -> dict[str, str]: index = index + 1 # linecache starts at 1 source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n") tgt_line = linecache.getline(str(self.tgt_file), index).rstrip("\n") @@ -263,9 +264,9 @@ class Seq2SeqDataset(AbstractSeq2SeqDataset): assert tgt_line, f"empty tgt line for index {index}" return {"tgt_texts": tgt_line, "src_texts": source_line, "id": index - 1} - def collate_fn(self, batch) -> Dict[str, torch.Tensor]: + def collate_fn(self, batch) -> dict[str, torch.Tensor]: """Call prepare_seq2seq_batch.""" - batch_encoding: Dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch( + batch_encoding: dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch( [x["src_texts"] for x in batch], tgt_texts=[x["tgt_texts"] for x in batch], max_length=self.max_source_length, @@ -293,7 +294,7 @@ class Seq2SeqDataCollator: if data_args.tgt_lang is not None: self.dataset_kwargs["tgt_lang"] = data_args.tgt_lang - def __call__(self, batch) -> Dict[str, torch.Tensor]: + def __call__(self, batch) -> dict[str, torch.Tensor]: if hasattr(self.tokenizer, "prepare_seq2seq_batch"): batch = self._encode(batch) input_ids, attention_mask, labels = ( @@ -329,7 +330,7 @@ class Seq2SeqDataCollator: shifted_input_ids[..., 0] = self.pad_token_id return shifted_input_ids - def _encode(self, batch) -> Dict[str, torch.Tensor]: + def _encode(self, batch) -> dict[str, torch.Tensor]: batch_encoding = self.tokenizer.prepare_seq2seq_batch( [x["src_texts"] for x in batch], tgt_texts=[x["tgt_texts"] for x in batch], @@ -355,7 +356,7 @@ class SortishSampler(Sampler): return iter(sortish_sampler_indices(self.data, self.bs, shuffle=self.shuffle)) -def sortish_sampler_indices(data: List, bs: int, shuffle=True) -> np.array: +def sortish_sampler_indices(data: list, bs: int, shuffle=True) -> np.array: "Go through the text data by order of src length with a bit of randomness. From fastai repo." if not shuffle: return np.argsort(np.array(data) * -1) @@ -455,7 +456,7 @@ def pickle_save(obj, path): return pickle.dump(obj, f) -def flatten_list(summary_ids: List[List]): +def flatten_list(summary_ids: list[list]): return list(itertools.chain.from_iterable(summary_ids)) @@ -506,14 +507,14 @@ def extract_rouge_mid_statistics(dct): def calculate_rouge( - pred_lns: List[str], - tgt_lns: List[str], + pred_lns: list[str], + tgt_lns: list[str], use_stemmer=True, rouge_keys=ROUGE_KEYS, return_precision_and_recall=False, bootstrap_aggregation=True, newline_sep=True, -) -> Dict: +) -> dict: """Calculate rouge using rouge_scorer package. Args: @@ -590,19 +591,19 @@ def any_requires_grad(model: nn.Module) -> bool: def assert_all_frozen(model): - model_grads: List[bool] = list(grad_status(model)) + model_grads: list[bool] = list(grad_status(model)) n_require_grad = sum(lmap(int, model_grads)) npars = len(model_grads) assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad" def assert_not_all_frozen(model): - model_grads: List[bool] = list(grad_status(model)) + model_grads: list[bool] = list(grad_status(model)) npars = len(model_grads) assert any(model_grads), f"none of {npars} weights require grad" -def parse_numeric_n_bool_cl_kwargs(unparsed_args: List[str]) -> Dict[str, Union[int, float, bool]]: +def parse_numeric_n_bool_cl_kwargs(unparsed_args: list[str]) -> dict[str, Union[int, float, bool]]: """ Parse an argv list of unspecified command line args to a dict. Assumes all values are either numeric or boolean in the form of true/false. diff --git a/examples/legacy/token-classification/run_ner.py b/examples/legacy/token-classification/run_ner.py index 4fb74e78ff9..69b8a27ac79 100644 --- a/examples/legacy/token-classification/run_ner.py +++ b/examples/legacy/token-classification/run_ner.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -20,7 +19,7 @@ import os import sys from dataclasses import dataclass, field from importlib import import_module -from typing import Dict, List, Optional, Tuple +from typing import Optional import numpy as np from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score @@ -159,7 +158,7 @@ def main(): # Prepare CONLL-2003 task labels = token_classification_task.get_labels(data_args.labels) - label_map: Dict[int, str] = dict(enumerate(labels)) + label_map: dict[int, str] = dict(enumerate(labels)) num_labels = len(labels) # Load pretrained model and tokenizer @@ -217,7 +216,7 @@ def main(): else None ) - def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[List[int], List[int]]: + def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> tuple[list[int], list[int]]: preds = np.argmax(predictions, axis=2) batch_size, seq_len = preds.shape @@ -233,7 +232,7 @@ def main(): return preds_list, out_label_list - def compute_metrics(p: EvalPrediction) -> Dict: + def compute_metrics(p: EvalPrediction) -> dict: preds_list, out_label_list = align_predictions(p.predictions, p.label_ids) return { "accuracy_score": accuracy_score(out_label_list, preds_list), @@ -279,7 +278,7 @@ def main(): logger.info("***** Eval results *****") for key, value in result.items(): logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + writer.write("{} = {}\n".format(key, value)) results.update(result) @@ -304,13 +303,13 @@ def main(): with open(output_test_results_file, "w") as writer: for key, value in metrics.items(): logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + writer.write("{} = {}\n".format(key, value)) # Save predictions output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt") if trainer.is_world_process_zero(): with open(output_test_predictions_file, "w") as writer: - with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f: + with open(os.path.join(data_args.data_dir, "test.txt")) as f: token_classification_task.write_predictions_to_file(writer, f, preds_list) return results diff --git a/examples/legacy/token-classification/scripts/preprocess.py b/examples/legacy/token-classification/scripts/preprocess.py index 4eaa4fe2f3b..40ecf2b32ac 100644 --- a/examples/legacy/token-classification/scripts/preprocess.py +++ b/examples/legacy/token-classification/scripts/preprocess.py @@ -12,7 +12,7 @@ subword_len_counter = 0 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) max_len -= tokenizer.num_special_tokens_to_add() -with open(dataset, "rt") as f_p: +with open(dataset) as f_p: for line in f_p: line = line.rstrip() diff --git a/examples/legacy/token-classification/tasks.py b/examples/legacy/token-classification/tasks.py index 43de0a7f04a..7e406fa7757 100644 --- a/examples/legacy/token-classification/tasks.py +++ b/examples/legacy/token-classification/tasks.py @@ -1,6 +1,6 @@ import logging import os -from typing import List, TextIO, Union +from typing import TextIO, Union from conllu import parse_incr from utils_ner import InputExample, Split, TokenClassificationTask @@ -14,7 +14,7 @@ class NER(TokenClassificationTask): # in NER datasets, the last column is usually reserved for NER label self.label_idx = label_idx - def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]: + def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]: if isinstance(mode, Split): mode = mode.value file_path = os.path.join(data_dir, f"{mode}.txt") @@ -42,7 +42,7 @@ class NER(TokenClassificationTask): examples.append(InputExample(guid=f"{mode}-{guid_index}", words=words, labels=labels)) return examples - def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List): + def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list): example_id = 0 for line in test_input_reader: if line.startswith("-DOCSTART-") or line == "" or line == "\n": @@ -55,9 +55,9 @@ class NER(TokenClassificationTask): else: logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0]) - def get_labels(self, path: str) -> List[str]: + def get_labels(self, path: str) -> list[str]: if path: - with open(path, "r") as f: + with open(path) as f: labels = f.read().splitlines() if "O" not in labels: labels = ["O"] + labels @@ -71,9 +71,9 @@ class Chunk(NER): # in CONLL2003 dataset chunk column is second-to-last super().__init__(label_idx=-2) - def get_labels(self, path: str) -> List[str]: + def get_labels(self, path: str) -> list[str]: if path: - with open(path, "r") as f: + with open(path) as f: labels = f.read().splitlines() if "O" not in labels: labels = ["O"] + labels @@ -105,7 +105,7 @@ class Chunk(NER): class POS(TokenClassificationTask): - def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]: + def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]: if isinstance(mode, Split): mode = mode.value file_path = os.path.join(data_dir, f"{mode}.txt") @@ -125,7 +125,7 @@ class POS(TokenClassificationTask): guid_index += 1 return examples - def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List): + def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list): example_id = 0 for sentence in parse_incr(test_input_reader): s_p = preds_list[example_id] @@ -136,9 +136,9 @@ class POS(TokenClassificationTask): writer.write(out) example_id += 1 - def get_labels(self, path: str) -> List[str]: + def get_labels(self, path: str) -> list[str]: if path: - with open(path, "r") as f: + with open(path) as f: return f.read().splitlines() else: return [ diff --git a/examples/legacy/token-classification/utils_ner.py b/examples/legacy/token-classification/utils_ner.py index 9167ce15161..0c1725b59b4 100644 --- a/examples/legacy/token-classification/utils_ner.py +++ b/examples/legacy/token-classification/utils_ner.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -19,7 +18,7 @@ import logging import os from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Union +from typing import Optional, Union from filelock import FileLock @@ -42,8 +41,8 @@ class InputExample: """ guid: str - words: List[str] - labels: Optional[List[str]] + words: list[str] + labels: Optional[list[str]] @dataclass @@ -53,10 +52,10 @@ class InputFeatures: Property names are the same names as the corresponding inputs to a model. """ - input_ids: List[int] - attention_mask: List[int] - token_type_ids: Optional[List[int]] = None - label_ids: Optional[List[int]] = None + input_ids: list[int] + attention_mask: list[int] + token_type_ids: Optional[list[int]] = None + label_ids: Optional[list[int]] = None class Split(Enum): @@ -67,17 +66,17 @@ class Split(Enum): class TokenClassificationTask: @staticmethod - def read_examples_from_file(data_dir, mode: Union[Split, str]) -> List[InputExample]: + def read_examples_from_file(data_dir, mode: Union[Split, str]) -> list[InputExample]: raise NotImplementedError @staticmethod - def get_labels(path: str) -> List[str]: + def get_labels(path: str) -> list[str]: raise NotImplementedError @staticmethod def convert_examples_to_features( - examples: List[InputExample], - label_list: List[str], + examples: list[InputExample], + label_list: list[str], max_seq_length: int, tokenizer: PreTrainedTokenizer, cls_token_at_end=False, @@ -91,7 +90,7 @@ class TokenClassificationTask: pad_token_label_id=-100, sequence_a_segment_id=0, mask_padding_with_zero=True, - ) -> List[InputFeatures]: + ) -> list[InputFeatures]: """Loads a data file into a list of `InputFeatures` `cls_token_at_end` define the location of the CLS token: - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP] @@ -214,7 +213,7 @@ if is_torch_available(): soon. """ - features: List[InputFeatures] + features: list[InputFeatures] pad_token_label_id: int = nn.CrossEntropyLoss().ignore_index # Use cross entropy ignore_index as padding label id so that only # real label ids contribute to the loss later. @@ -224,7 +223,7 @@ if is_torch_available(): token_classification_task: TokenClassificationTask, data_dir: str, tokenizer: PreTrainedTokenizer, - labels: List[str], + labels: list[str], model_type: str, max_seq_length: Optional[int] = None, overwrite_cache=False, @@ -233,7 +232,7 @@ if is_torch_available(): # Load data features from cache or dataset file cached_features_file = os.path.join( data_dir, - "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)), + f"cached_{mode.value}_{tokenizer.__class__.__name__}_{str(max_seq_length)}", ) # Make sure only the first process in distributed training processes the dataset, @@ -283,7 +282,7 @@ if is_tf_available(): soon. """ - features: List[InputFeatures] + features: list[InputFeatures] pad_token_label_id: int = -100 # Use cross entropy ignore_index as padding label id so that only # real label ids contribute to the loss later. @@ -293,7 +292,7 @@ if is_tf_available(): token_classification_task: TokenClassificationTask, data_dir: str, tokenizer: PreTrainedTokenizer, - labels: List[str], + labels: list[str], model_type: str, max_seq_length: Optional[int] = None, overwrite_cache=False, diff --git a/examples/modular-transformers/image_processing_new_imgproc_model.py b/examples/modular-transformers/image_processing_new_imgproc_model.py index bde80dd0964..8320a25228c 100644 --- a/examples/modular-transformers/image_processing_new_imgproc_model.py +++ b/examples/modular-transformers/image_processing_new_imgproc_model.py @@ -4,7 +4,7 @@ # the file from the modular. If any change should be done, please apply the change to the # modular_new_imgproc_model.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 -from typing import Dict, List, Optional, Union +from typing import Optional, Union import numpy as np import torch @@ -74,13 +74,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, + image_mean: Optional[Union[float, list[float]]] = None, + image_std: Optional[Union[float, list[float]]] = None, do_convert_rgb: bool = True, **kwargs, ) -> None: @@ -101,7 +101,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor): def resize( self, image: np.ndarray, - size: Dict[str, int], + size: dict[str, int], resample: PILImageResampling = PILImageResampling.BICUBIC, data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -151,13 +151,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Optional[Dict[str, int]] = None, + size: Optional[dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, + image_mean: Optional[Union[float, list[float]]] = None, + image_std: Optional[Union[float, list[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, do_convert_rgb: bool = None, data_format: ChannelDimension = ChannelDimension.FIRST, diff --git a/examples/modular-transformers/modeling_add_function.py b/examples/modular-transformers/modeling_add_function.py index acf140f025d..76b6dafb69a 100644 --- a/examples/modular-transformers/modeling_add_function.py +++ b/examples/modular-transformers/modeling_add_function.py @@ -5,7 +5,7 @@ # modular_add_function.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # Note that zamba does not have the `apply_rotary_pos_emb` function! -from typing import Optional, Tuple +from typing import Optional import torch from torch import nn @@ -62,5 +62,5 @@ class TestAttention(nn.Module): def __init__(self): pass - def forward(self) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + def forward(self) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: _ = apply_rotary_pos_emb(1, 1, 1, 1) diff --git a/examples/modular-transformers/modeling_dummy.py b/examples/modular-transformers/modeling_dummy.py index 98a72a3e659..f793923cb2f 100644 --- a/examples/modular-transformers/modeling_dummy.py +++ b/examples/modular-transformers/modeling_dummy.py @@ -5,7 +5,7 @@ # modular_dummy.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 from functools import partial -from typing import Callable, Optional, Tuple, Union +from typing import Callable, Optional, Union import torch from torch import nn @@ -223,12 +223,12 @@ class DummyAttention(nn.Module): def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], + position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: Optional[torch.Tensor], past_key_value: Optional[Cache] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: input_shape = hidden_states.shape[:-1] hidden_shape = (*input_shape, -1, self.head_dim) @@ -290,9 +290,9 @@ class DummyDecoderLayer(nn.Module): output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]: residual = hidden_states hidden_states = self.input_layernorm(hidden_states) @@ -494,7 +494,7 @@ class DummyModel(DummyPreTrainedModel): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, **flash_attn_kwargs: Unpack[FlashAttentionKwargs], - ) -> Union[Tuple, BaseModelOutputWithPast]: + ) -> Union[tuple, BaseModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/examples/modular-transformers/modeling_dummy_bert.py b/examples/modular-transformers/modeling_dummy_bert.py index e18e6a19e8a..5de9ab576e5 100644 --- a/examples/modular-transformers/modeling_dummy_bert.py +++ b/examples/modular-transformers/modeling_dummy_bert.py @@ -6,7 +6,7 @@ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 import math import os -from typing import List, Optional, Tuple, Union +from typing import Optional, Union import torch from packaging import version @@ -136,9 +136,9 @@ class DummyBertSelfAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: mixed_query_layer = self.query(hidden_states) # If this is instantiated as a cross-attention module, the keys @@ -245,9 +245,9 @@ class DummyBertSdpaSelfAttention(DummyBertSelfAttention): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None: # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented. logger.warning_once( @@ -386,9 +386,9 @@ class DummyBertAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: self_outputs = self.self( hidden_states, attention_mask, @@ -454,9 +454,9 @@ class DummyBertLayer(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: # decoder uni-directional self-attention cached key/values tuple is at positions 1,2 self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attention_outputs = self.attention( @@ -532,12 +532,12 @@ class DummyBertEncoder(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, - ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: + ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: all_hidden_states = () if output_hidden_states else None all_self_attentions = () if output_attentions else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None @@ -858,12 +858,12 @@ class DummyBertModel(DummyBertPreTrainedModel): inputs_embeds: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[list[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: + ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: r""" encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if diff --git a/examples/modular-transformers/modeling_from_uppercase_model.py b/examples/modular-transformers/modeling_from_uppercase_model.py index d6c16c69743..4385ac7bcf3 100644 --- a/examples/modular-transformers/modeling_from_uppercase_model.py +++ b/examples/modular-transformers/modeling_from_uppercase_model.py @@ -4,7 +4,7 @@ # the file from the modular. If any change should be done, please apply the change to the # modular_from_uppercase_model.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 -from typing import Optional, Tuple +from typing import Optional import torch from torch import nn @@ -53,7 +53,7 @@ class FromUppercaseModelAttention(nn.Module): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: """Input shape: Batch x Time x Channel""" bsz, tgt_len, embed_dim = hidden_states.size() @@ -148,7 +148,7 @@ class FromUppercaseModelFlashAttention2(FromUppercaseModelAttention): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: output_attentions = False batch_size, q_len, _ = hidden_states.size() @@ -226,7 +226,7 @@ class FromUppercaseModelSdpaAttention(FromUppercaseModelAttention): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: if output_attentions: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. logger.warning_once( @@ -322,7 +322,7 @@ class FromUppercaseModelEncoderLayer(nn.Module): attention_mask: torch.Tensor, causal_attention_mask: torch.Tensor, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.FloatTensor]: + ) -> tuple[torch.FloatTensor]: """ Args: hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` diff --git a/examples/modular-transformers/modeling_multimodal1.py b/examples/modular-transformers/modeling_multimodal1.py index 91d226d12b8..e3ecbdd6974 100644 --- a/examples/modular-transformers/modeling_multimodal1.py +++ b/examples/modular-transformers/modeling_multimodal1.py @@ -5,7 +5,7 @@ # modular_multimodal1.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 from functools import partial -from typing import Callable, Optional, Tuple, Union +from typing import Callable, Optional, Union import torch from torch import nn @@ -223,12 +223,12 @@ class Multimodal1TextAttention(nn.Module): def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], + position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: Optional[torch.Tensor], past_key_value: Optional[Cache] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: input_shape = hidden_states.shape[:-1] hidden_shape = (*input_shape, -1, self.head_dim) @@ -290,9 +290,9 @@ class Multimodal1TextDecoderLayer(nn.Module): output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]: residual = hidden_states hidden_states = self.input_layernorm(hidden_states) @@ -494,7 +494,7 @@ class Multimodal1TextModel(Multimodal1TextPreTrainedModel): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, **flash_attn_kwargs: Unpack[FlashAttentionKwargs], - ) -> Union[Tuple, BaseModelOutputWithPast]: + ) -> Union[tuple, BaseModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/examples/modular-transformers/modeling_multimodal2.py b/examples/modular-transformers/modeling_multimodal2.py index ba2e9a4d6f2..a2a9d460bda 100644 --- a/examples/modular-transformers/modeling_multimodal2.py +++ b/examples/modular-transformers/modeling_multimodal2.py @@ -5,7 +5,7 @@ # modular_multimodal2.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 -from typing import Optional, Tuple, Union +from typing import Optional, Union import torch from torch import nn @@ -65,7 +65,7 @@ class Multimodal2VisionAttention(nn.Module): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: """Input shape: Batch x Time x Channel""" bsz, tgt_len, embed_dim = hidden_states.size() @@ -152,7 +152,7 @@ class Multimodal2VisionSdpaAttention(Multimodal2VisionAttention): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: if output_attentions: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. logger.warning_once( @@ -233,7 +233,7 @@ class Multimodal2VisionFlashAttention2(Multimodal2VisionAttention): attention_mask: Optional[torch.Tensor] = None, causal_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: output_attentions = False batch_size, q_len, _ = hidden_states.size() @@ -334,7 +334,7 @@ class Multimodal2VisionEncoderLayer(nn.Module): attention_mask: torch.Tensor, causal_attention_mask: torch.Tensor, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.FloatTensor]: + ) -> tuple[torch.FloatTensor]: """ Args: hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` @@ -392,7 +392,7 @@ class Multimodal2VisionEncoder(nn.Module): output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple, BaseModelOutput]: + ) -> Union[tuple, BaseModelOutput]: r""" Args: inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): @@ -587,7 +587,7 @@ class Multimodal2VisionTransformer(nn.Module): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = False, - ) -> Union[Tuple, BaseModelOutputWithPooling]: + ) -> Union[tuple, BaseModelOutputWithPooling]: r""" Returns: @@ -671,7 +671,7 @@ class Multimodal2VisionModel(Multimodal2VisionPreTrainedModel): output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: bool = False, return_dict: Optional[bool] = None, - ) -> Union[Tuple, BaseModelOutputWithPooling]: + ) -> Union[tuple, BaseModelOutputWithPooling]: r""" Returns: diff --git a/examples/modular-transformers/modeling_my_new_model2.py b/examples/modular-transformers/modeling_my_new_model2.py index 854d280663f..712d68c100f 100644 --- a/examples/modular-transformers/modeling_my_new_model2.py +++ b/examples/modular-transformers/modeling_my_new_model2.py @@ -4,7 +4,7 @@ # the file from the modular. If any change should be done, please apply the change to the # modular_my_new_model2.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, Optional, Union import torch from torch import nn @@ -222,12 +222,12 @@ class MyNewModel2Attention(nn.Module): def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], + position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: Optional[torch.Tensor], past_key_value: Optional[Cache] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: input_shape = hidden_states.shape[:-1] hidden_shape = (*input_shape, -1, self.head_dim) @@ -289,9 +289,9 @@ class MyNewModel2DecoderLayer(nn.Module): output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]: residual = hidden_states hidden_states = self.input_layernorm(hidden_states) @@ -485,7 +485,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel): input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -493,7 +493,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs, # NOOP kwarg for now - ) -> Union[Tuple, BaseModelOutputWithPast]: + ) -> Union[tuple, BaseModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -753,14 +753,14 @@ class MyNewModel2ForSequenceClassification(MyNewModel2PreTrainedModel): input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + ) -> Union[tuple, SequenceClassifierOutputWithPast]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/examples/modular-transformers/modeling_new_task_model.py b/examples/modular-transformers/modeling_new_task_model.py index ea2e1a2b9a1..d449ca50a21 100644 --- a/examples/modular-transformers/modeling_new_task_model.py +++ b/examples/modular-transformers/modeling_new_task_model.py @@ -5,7 +5,7 @@ # modular_new_task_model.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 from dataclasses import dataclass -from typing import ClassVar, List, Optional, Tuple, Union +from typing import ClassVar, Optional, Union import torch from torch import nn @@ -61,9 +61,9 @@ class NewTaskModelCausalLMOutputWithPast(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None + hidden_states: Optional[tuple[torch.FloatTensor]] = None + attentions: Optional[tuple[torch.FloatTensor]] = None image_hidden_states: Optional[torch.FloatTensor] = None @@ -337,7 +337,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin): pixel_values: torch.FloatTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, + past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, token_type_ids: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -347,7 +347,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, num_logits_to_keep: int = 0, - ) -> Union[Tuple, NewTaskModelCausalLMOutputWithPast]: + ) -> Union[tuple, NewTaskModelCausalLMOutputWithPast]: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/examples/modular-transformers/modeling_roberta.py b/examples/modular-transformers/modeling_roberta.py index e50cf60c3a4..b03774e2285 100644 --- a/examples/modular-transformers/modeling_roberta.py +++ b/examples/modular-transformers/modeling_roberta.py @@ -6,7 +6,7 @@ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 import math import os -from typing import List, Optional, Tuple, Union +from typing import Optional, Union import torch import torch.nn as nn @@ -139,9 +139,9 @@ class RobertaSelfAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: mixed_query_layer = self.query(hidden_states) # If this is instantiated as a cross-attention module, the keys @@ -248,9 +248,9 @@ class RobertaSdpaSelfAttention(RobertaSelfAttention): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None: # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented. logger.warning_once( @@ -389,9 +389,9 @@ class RobertaAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: self_outputs = self.self( hidden_states, attention_mask, @@ -457,9 +457,9 @@ class RobertaLayer(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None, output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + ) -> tuple[torch.Tensor]: # decoder uni-directional self-attention cached key/values tuple is at positions 1,2 self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attention_outputs = self.attention( @@ -535,12 +535,12 @@ class RobertaEncoder(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, - ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: + ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: all_hidden_states = () if output_hidden_states else None all_self_attentions = () if output_attentions else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None @@ -861,12 +861,12 @@ class RobertaModel(RobertaPreTrainedModel): inputs_embeds: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[list[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: + ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: r""" encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if diff --git a/examples/modular-transformers/modeling_super.py b/examples/modular-transformers/modeling_super.py index d618cd54e90..2ab9865f438 100644 --- a/examples/modular-transformers/modeling_super.py +++ b/examples/modular-transformers/modeling_super.py @@ -4,7 +4,7 @@ # the file from the modular. If any change should be done, please apply the change to the # modular_super.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, Optional, Union import torch from torch import nn @@ -222,12 +222,12 @@ class SuperAttention(nn.Module): def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], + position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: Optional[torch.Tensor], past_key_value: Optional[Cache] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: input_shape = hidden_states.shape[:-1] hidden_shape = (*input_shape, -1, self.head_dim) @@ -289,9 +289,9 @@ class SuperDecoderLayer(nn.Module): output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]: residual = hidden_states hidden_states = self.input_layernorm(hidden_states) @@ -485,14 +485,14 @@ class SuperModel(SuperPreTrainedModel): input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, - ) -> Union[Tuple, BaseModelOutputWithPast]: + ) -> Union[tuple, BaseModelOutputWithPast]: out = super().forward( input_ids, attention_mask, diff --git a/examples/modular-transformers/modeling_switch_function.py b/examples/modular-transformers/modeling_switch_function.py index 3b89284537a..75811d8681c 100644 --- a/examples/modular-transformers/modeling_switch_function.py +++ b/examples/modular-transformers/modeling_switch_function.py @@ -5,7 +5,7 @@ # modular_switch_function.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # Note that llama and cohere have different definitions for rotate_half -from typing import Callable, Optional, Tuple +from typing import Callable, Optional import torch from torch import nn @@ -123,12 +123,12 @@ class SwitchFunctionAttention(nn.Module): def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], + position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: Optional[torch.Tensor], past_key_value: Optional[Cache] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs: Unpack[FlashAttentionKwargs], - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: input_shape = hidden_states.shape[:-1] hidden_shape = (*input_shape, -1, self.head_dim) diff --git a/examples/modular-transformers/modular_dummy_bert.py b/examples/modular-transformers/modular_dummy_bert.py index 7a83a2e0ed2..34d2cd1b335 100644 --- a/examples/modular-transformers/modular_dummy_bert.py +++ b/examples/modular-transformers/modular_dummy_bert.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Union +from typing import Optional, Union import torch @@ -18,10 +18,10 @@ class DummyBertModel(BertModel): inputs_embeds: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[list[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: + ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: return super().forward(input_ids) diff --git a/examples/modular-transformers/modular_new_task_model.py b/examples/modular-transformers/modular_new_task_model.py index a16b114a919..f1943e37e1f 100644 --- a/examples/modular-transformers/modular_new_task_model.py +++ b/examples/modular-transformers/modular_new_task_model.py @@ -1,4 +1,4 @@ -from typing import ClassVar, List, Optional, Union +from typing import ClassVar, Optional, Union import torch import torch.utils.checkpoint @@ -29,7 +29,7 @@ class NewTaskModelForNewTask(PaliGemmaForConditionalGeneration): pixel_values: torch.FloatTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, + past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, token_type_ids: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, diff --git a/examples/modular-transformers/modular_super.py b/examples/modular-transformers/modular_super.py index 59909a41e4d..f7a3f46d44a 100644 --- a/examples/modular-transformers/modular_super.py +++ b/examples/modular-transformers/modular_super.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Union +from typing import Optional, Union import torch @@ -15,14 +15,14 @@ class SuperModel(LlamaModel): input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, - ) -> Union[Tuple, CausalLMOutputWithPast]: + ) -> Union[tuple, CausalLMOutputWithPast]: out = super().forward( input_ids, attention_mask, diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index eadf508a4a8..8ea627dbdd7 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py index 39c6b993094..2f5d8eef7c9 100644 --- a/examples/pytorch/contrastive-image-text/run_clip.py +++ b/examples/pytorch/contrastive-image-text/run_clip.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 5762e06fe09..9d70c80ea6e 100755 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index f68ae8117d5..67ecbf6885a 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index 25a0cb160ea..f39fcd17c0d 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index 0a96355a755..842e7c8d606 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index 1daed502b91..e33b05c829e 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/instance-segmentation/run_instance_segmentation.py b/examples/pytorch/instance-segmentation/run_instance_segmentation.py index 434e77605cc..6c4087120de 100644 --- a/examples/pytorch/instance-segmentation/run_instance_segmentation.py +++ b/examples/pytorch/instance-segmentation/run_instance_segmentation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,9 +17,10 @@ import logging import os import sys +from collections.abc import Mapping from dataclasses import dataclass, field from functools import partial -from typing import Any, Dict, List, Mapping, Optional +from typing import Any, Optional import albumentations as A import numpy as np @@ -200,7 +200,7 @@ class Evaluator: def reset_metric(self): self.metric.reset() - def postprocess_target_batch(self, target_batch) -> List[Dict[str, torch.Tensor]]: + def postprocess_target_batch(self, target_batch) -> list[dict[str, torch.Tensor]]: """Collect targets in a form of list of dictionaries with keys "masks", "labels".""" batch_masks = target_batch[0] batch_labels = target_batch[1] @@ -214,13 +214,13 @@ class Evaluator: ) return post_processed_targets - def get_target_sizes(self, post_processed_targets) -> List[List[int]]: + def get_target_sizes(self, post_processed_targets) -> list[list[int]]: target_sizes = [] for target in post_processed_targets: target_sizes.append(target["masks"].shape[-2:]) return target_sizes - def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> List[Dict[str, torch.Tensor]]: + def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> list[dict[str, torch.Tensor]]: """Collect predictions in a form of list of dictionaries with keys "masks", "labels", "scores".""" model_output = ModelOutput(class_queries_logits=prediction_batch[0], masks_queries_logits=prediction_batch[1]) diff --git a/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py b/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py index 3c814f2598d..f130a77887e 100644 --- a/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py +++ b/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,9 +20,10 @@ import logging import math import os import sys +from collections.abc import Mapping from functools import partial from pathlib import Path -from typing import Any, Mapping +from typing import Any import albumentations as A import datasets diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index f36ad9cfd65..44869b004b9 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 68466de08bf..530ff5a3fa5 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/language-modeling/run_fim.py b/examples/pytorch/language-modeling/run_fim.py index c6d44ffcefa..ac97a3c10d4 100644 --- a/examples/pytorch/language-modeling/run_fim.py +++ b/examples/pytorch/language-modeling/run_fim.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -551,7 +550,7 @@ def main(): covariance_matrix=1e-5 * sigma, ) new_token_embeddings = torch.stack( - tuple((dist.sample() for _ in range(len(special_tokens)))), + tuple(dist.sample() for _ in range(len(special_tokens))), dim=0, ) else: @@ -571,7 +570,7 @@ def main(): covariance_matrix=1e-5 * sigma, ) new_token_embeddings = torch.stack( - tuple((dist.sample() for _ in range(len(special_tokens)))), + tuple(dist.sample() for _ in range(len(special_tokens))), dim=0, ) diff --git a/examples/pytorch/language-modeling/run_fim_no_trainer.py b/examples/pytorch/language-modeling/run_fim_no_trainer.py index ec14d62d796..ae5d22c3a0a 100644 --- a/examples/pytorch/language-modeling/run_fim_no_trainer.py +++ b/examples/pytorch/language-modeling/run_fim_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -518,7 +517,7 @@ def main(): covariance_matrix=1e-5 * sigma, ) new_token_embeddings = torch.stack( - tuple((dist.sample() for _ in range(len(special_tokens)))), + tuple(dist.sample() for _ in range(len(special_tokens))), dim=0, ) else: @@ -538,7 +537,7 @@ def main(): covariance_matrix=1e-5 * sigma, ) new_token_embeddings = torch.stack( - tuple((dist.sample() for _ in range(len(special_tokens)))), + tuple(dist.sample() for _ in range(len(special_tokens))), dim=0, ) diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 2353aef0106..7a9abff0e30 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index d850cc2d463..c57e65c6e65 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 5a736201e21..0eda261d177 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index bed39d20f41..3817a2d55b7 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 7f09614a61c..f6f140e6d60 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/object-detection/run_object_detection.py b/examples/pytorch/object-detection/run_object_detection.py index ec8eba2cc6d..cc319d331e2 100644 --- a/examples/pytorch/object-detection/run_object_detection.py +++ b/examples/pytorch/object-detection/run_object_detection.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,9 +17,10 @@ import logging import os import sys +from collections.abc import Mapping from dataclasses import dataclass, field from functools import partial -from typing import Any, List, Mapping, Optional, Tuple, Union +from typing import Any, Optional, Union import albumentations as A import numpy as np @@ -60,7 +60,7 @@ class ModelOutput: def format_image_annotations_as_coco( - image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]] + image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]] ) -> dict: """Format one set of image annotations to the COCO format @@ -94,7 +94,7 @@ def format_image_annotations_as_coco( } -def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor: +def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor: """ Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. @@ -148,7 +148,7 @@ def augment_and_transform_batch( return result -def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]: +def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]: data = {} data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) data["labels"] = [x["labels"] for x in batch] diff --git a/examples/pytorch/object-detection/run_object_detection_no_trainer.py b/examples/pytorch/object-detection/run_object_detection_no_trainer.py index 3a11f2762a8..db69a00a577 100644 --- a/examples/pytorch/object-detection/run_object_detection_no_trainer.py +++ b/examples/pytorch/object-detection/run_object_detection_no_trainer.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,9 +18,10 @@ import json import logging import math import os +from collections.abc import Mapping from functools import partial from pathlib import Path -from typing import Any, List, Mapping, Tuple, Union +from typing import Any, Union import albumentations as A import datasets @@ -61,7 +61,7 @@ require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/sema # Copied from examples/pytorch/object-detection/run_object_detection.format_image_annotations_as_coco def format_image_annotations_as_coco( - image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]] + image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]] ) -> dict: """Format one set of image annotations to the COCO format @@ -96,7 +96,7 @@ def format_image_annotations_as_coco( # Copied from examples/pytorch/object-detection/run_object_detection.convert_bbox_yolo_to_pascal -def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor: +def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor: """ Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. @@ -152,7 +152,7 @@ def augment_and_transform_batch( # Copied from examples/pytorch/object-detection/run_object_detection.collate_fn -def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]: +def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]: data = {} data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) data["labels"] = [x["labels"] for x in batch] diff --git a/examples/pytorch/old_test_xla_examples.py b/examples/pytorch/old_test_xla_examples.py index c13d8b31151..b3101aa06b9 100644 --- a/examples/pytorch/old_test_xla_examples.py +++ b/examples/pytorch/old_test_xla_examples.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 HuggingFace Inc.. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,7 +32,7 @@ def get_results(output_dir): results = {} path = os.path.join(output_dir, "all_results.json") if os.path.exists(path): - with open(path, "r") as f: + with open(path) as f: results = json.load(f) else: raise ValueError(f"can't find {path}") diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index bdb879269b9..dbdc52cedea 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index 971e7100459..6ea909a7da0 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 5cc5397efea..84bead830a7 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index e47a868dd24..138dd61f99a 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py index f7121697b07..a07e34f091d 100644 --- a/examples/pytorch/question-answering/run_seq2seq_qa.py +++ b/examples/pytorch/question-answering/run_seq2seq_qa.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +21,7 @@ import logging import os import sys from dataclasses import dataclass, field -from typing import List, Optional, Tuple +from typing import Optional import datasets import evaluate @@ -469,7 +468,7 @@ def main(): question_column: str, context_column: str, answer_column: str, - ) -> Tuple[List[str], List[str]]: + ) -> tuple[list[str], list[str]]: questions = examples[question_column] contexts = examples[context_column] answers = examples[answer_column] diff --git a/examples/pytorch/question-answering/trainer_qa.py b/examples/pytorch/question-answering/trainer_qa.py index 10428a2b77e..3948391f633 100644 --- a/examples/pytorch/question-answering/trainer_qa.py +++ b/examples/pytorch/question-answering/trainer_qa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/question-answering/trainer_seq2seq_qa.py b/examples/pytorch/question-answering/trainer_seq2seq_qa.py index ff9e2d7bc3f..2492f601316 100644 --- a/examples/pytorch/question-answering/trainer_seq2seq_qa.py +++ b/examples/pytorch/question-answering/trainer_seq2seq_qa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2021 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +17,7 @@ A subclass of `Trainer` specific to Question-Answering tasks import math import time -from typing import Dict, List, Optional +from typing import Optional from torch.utils.data import Dataset @@ -42,10 +41,10 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer): self, eval_dataset: Optional[Dataset] = None, eval_examples=None, - ignore_keys: Optional[List[str]] = None, + ignore_keys: Optional[list[str]] = None, metric_key_prefix: str = "eval", **gen_kwargs, - ) -> Dict[str, float]: + ) -> dict[str, float]: gen_kwargs = gen_kwargs.copy() # Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the diff --git a/examples/pytorch/question-answering/utils_qa.py b/examples/pytorch/question-answering/utils_qa.py index 79497dbb816..f0cc5c26a69 100644 --- a/examples/pytorch/question-answering/utils_qa.py +++ b/examples/pytorch/question-answering/utils_qa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +19,7 @@ import collections import json import logging import os -from typing import Optional, Tuple +from typing import Optional import numpy as np from tqdm.auto import tqdm @@ -32,7 +31,7 @@ logger = logging.getLogger(__name__) def postprocess_qa_predictions( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -223,7 +222,7 @@ def postprocess_qa_predictions( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" @@ -253,7 +252,7 @@ def postprocess_qa_predictions( def postprocess_qa_predictions_with_beam_search( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index b570abba326..bfedf7c4ca9 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -258,7 +257,7 @@ def main(): else: repo_id = data_args.dataset_name filename = "id2label.json" - id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r")) + id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"))) id2label = {int(k): v for k, v in id2label.items()} label2id = {v: str(k) for k, v in id2label.items()} diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index df4e3030a4c..2cbc4130189 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -316,7 +315,7 @@ def main(): else: repo_id = args.dataset_name filename = "id2label.json" - id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r")) + id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"))) id2label = {int(k): v for k, v in id2label.items()} label2id = {v: k for k, v in id2label.items()} diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index 5c6c3f62c79..b4ce3f71eb5 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +19,7 @@ import math import os from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Optional, Union import datasets import torch @@ -328,7 +327,7 @@ class DataCollatorForWav2Vec2Pretraining: mask_time_prob: Optional[float] = 0.65 mask_time_length: Optional[int] = 10 - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]: # reformat list to dict and set to pytorch format batch = self.feature_extractor.pad( features, @@ -716,7 +715,7 @@ def main(): } log_str = "" for k, v in train_logs.items(): - log_str += "| {}: {:.3e}".format(k, v.item()) + log_str += f"| {k}: {v.item():.3e}" if accelerator.is_local_main_process: progress_bar.write(log_str) @@ -773,7 +772,7 @@ def main(): log_str = "" for k, v in val_logs.items(): - log_str += "| {}: {:.3e}".format(k, v.item()) + log_str += f"| {k}: {v.item():.3e}" if accelerator.is_local_main_process: progress_bar.write(log_str) diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index 79d83986017..53a1f98c890 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +23,7 @@ import re import sys import warnings from dataclasses import dataclass, field -from typing import Dict, List, Optional, Union +from typing import Optional, Union import datasets import evaluate @@ -211,11 +210,11 @@ class DataTrainingArguments: ) }, ) - chars_to_ignore: Optional[List[str]] = list_field( + chars_to_ignore: Optional[list[str]] = list_field( default=None, metadata={"help": "A list of characters to remove from the transcripts."}, ) - eval_metrics: List[str] = list_field( + eval_metrics: list[str] = list_field( default=["wer"], metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"}, ) @@ -318,7 +317,7 @@ class DataCollatorCTCWithPadding: pad_to_multiple_of_labels: Optional[int] = None feature_extractor_input_name: Optional[str] = "input_values" - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]: # split inputs and labels since they have to be of different lengths and need # different padding methods input_features = [ diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py index 0d500e948b4..511e7bc3d44 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +23,7 @@ import re import sys import warnings from dataclasses import dataclass, field -from typing import Dict, List, Optional, Union +from typing import Optional, Union import datasets import evaluate @@ -201,11 +200,11 @@ class DataTrainingArguments: ) }, ) - chars_to_ignore: Optional[List[str]] = list_field( + chars_to_ignore: Optional[list[str]] = list_field( default=None, metadata={"help": "A list of characters to remove from the transcripts."}, ) - eval_metrics: List[str] = list_field( + eval_metrics: list[str] = list_field( default=["wer"], metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"}, ) @@ -300,7 +299,7 @@ class DataCollatorCTCWithPadding: pad_to_multiple_of: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]: # split inputs and labels since they have to be of different lengths and need # different padding methods input_features = [{"input_values": feature["input_values"]} for feature in features] diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py index 504ec180007..1b64ea078d6 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +22,7 @@ import logging import os import sys from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union import datasets import evaluate @@ -110,11 +109,11 @@ class ModelArguments: freeze_encoder: bool = field( default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."} ) - forced_decoder_ids: List[List[int]] = field( + forced_decoder_ids: list[list[int]] = field( default=None, metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."}, ) - suppress_tokens: List[int] = field( + suppress_tokens: list[int] = field( default=None, metadata={ "help": ( @@ -247,7 +246,7 @@ class DataCollatorSpeechSeq2SeqWithPadding: decoder_start_token_id: int forward_attention_mask: bool - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]: # split inputs and labels since they have to be of different lengths and need # different padding methods model_input_name = self.processor.model_input_names[0] diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index a39d68505ab..ce63c1c7f06 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 6a79b160cb4..4351ecffd10 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/test_accelerate_examples.py b/examples/pytorch/test_accelerate_examples.py index fe700eabdd9..923803a2da5 100644 --- a/examples/pytorch/test_accelerate_examples.py +++ b/examples/pytorch/test_accelerate_examples.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 HuggingFace Inc.. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,7 +50,7 @@ def get_results(output_dir): results = {} path = os.path.join(output_dir, "all_results.json") if os.path.exists(path): - with open(path, "r") as f: + with open(path) as f: results = json.load(f) else: raise ValueError(f"can't find {path}") diff --git a/examples/pytorch/test_pytorch_examples.py b/examples/pytorch/test_pytorch_examples.py index 0df63ee946d..a986b426e1b 100644 --- a/examples/pytorch/test_pytorch_examples.py +++ b/examples/pytorch/test_pytorch_examples.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2018 HuggingFace Inc.. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,7 +86,7 @@ def get_results(output_dir): results = {} path = os.path.join(output_dir, "all_results.json") if os.path.exists(path): - with open(path, "r") as f: + with open(path) as f: results = json.load(f) else: raise ValueError(f"can't find {path}") diff --git a/examples/pytorch/text-classification/run_classification.py b/examples/pytorch/text-classification/run_classification.py index d9d734af6c2..62d71b7f28b 100755 --- a/examples/pytorch/text-classification/run_classification.py +++ b/examples/pytorch/text-classification/run_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +20,7 @@ import os import random import sys from dataclasses import dataclass, field -from typing import List, Optional +from typing import Optional import datasets import evaluate @@ -256,7 +255,7 @@ class ModelArguments: ) -def get_label_list(raw_dataset, split="train") -> List[str]: +def get_label_list(raw_dataset, split="train") -> list[str]: """Get the list of labels from a multi-label dataset""" if isinstance(raw_dataset[split]["label"][0], list): @@ -537,7 +536,7 @@ def main(): model.config.id2label = {id: label for label, id in label_to_id.items()} elif not is_regression: # classification, but not training logger.info("using label infos in the model config") - logger.info("label2id: {}".format(model.config.label2id)) + logger.info(f"label2id: {model.config.label2id}") label_to_id = model.config.label2id else: # regression label_to_id = None @@ -549,7 +548,7 @@ def main(): ) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) - def multi_labels_to_ids(labels: List[str]) -> List[float]: + def multi_labels_to_ids(labels: list[str]) -> list[float]: ids = [0.0] * len(label_to_id) # BCELoss requires float as target type for label in labels: ids[label_to_id[label]] = 1.0 @@ -735,7 +734,7 @@ def main(): else: item = label_list[item] writer.write(f"{index}\t{item}\n") - logger.info("Predict results saved at {}".format(output_predict_file)) + logger.info(f"Predict results saved at {output_predict_file}") kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} if training_args.push_to_hub: diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 5649f845564..c2930380859 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index cce6adf9740..8f05b0e90bc 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index a302cbf357d..86ecb0f63ad 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # diff --git a/examples/pytorch/text-generation/run_generation.py b/examples/pytorch/text-generation/run_generation.py index 570eb92645f..42cd9528e1d 100755 --- a/examples/pytorch/text-generation/run_generation.py +++ b/examples/pytorch/text-generation/run_generation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # @@ -19,7 +18,6 @@ import argparse import inspect import logging -from typing import Tuple import torch from accelerate import PartialState @@ -271,8 +269,8 @@ class _ModelFallbackWrapper(GenerationMixin): ) def _reorder_cache( - self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor - ) -> Tuple[Tuple[torch.Tensor]]: + self, past_key_values: tuple[tuple[torch.Tensor]], beam_idx: torch.Tensor + ) -> tuple[tuple[torch.Tensor]]: """ This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or [`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct diff --git a/examples/pytorch/text-generation/run_generation_contrastive_search.py b/examples/pytorch/text-generation/run_generation_contrastive_search.py index a36323e4ed7..5610dfb7f5d 100755 --- a/examples/pytorch/text-generation/run_generation_contrastive_search.py +++ b/examples/pytorch/text-generation/run_generation_contrastive_search.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 University of Cambridge, Tencent AI Lab, DeepMind and The University of Hong Kong Authors and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 7503dae0e40..bbe85cff2e8 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index c6168b5eb81..f9dee4a4920 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 6987258d76d..13106686469 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 8fb338c8689..ef2e928cf65 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/quantization/custom_quantization.py b/examples/quantization/custom_quantization.py index 16b31cd8ebe..006b092e671 100644 --- a/examples/quantization/custom_quantization.py +++ b/examples/quantization/custom_quantization.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict +from typing import Any import torch @@ -14,7 +14,7 @@ class CustomConfig(QuantizationConfigMixin): self.quant_method = "custom" self.bits = 8 - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: output = { "num_bits": self.bits, } @@ -24,7 +24,7 @@ class CustomConfig(QuantizationConfigMixin): config_dict = self.to_dict() return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n" - def to_diff_dict(self) -> Dict[str, Any]: + def to_diff_dict(self) -> dict[str, Any]: config_dict = self.to_dict() default_config_dict = CustomConfig().to_dict() diff --git a/examples/quantization/custom_quantization_int8_example.py b/examples/quantization/custom_quantization_int8_example.py index e43b2e0fc21..a61c041b447 100644 --- a/examples/quantization/custom_quantization_int8_example.py +++ b/examples/quantization/custom_quantization_int8_example.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Optional +from typing import Any, Optional import torch import torch.nn as nn @@ -112,7 +112,7 @@ class Int8SymmetricConfig(QuantizationConfigMixin): Configuration for INT8 symmetric quantization. """ - def __init__(self, modules_to_not_convert: Optional[List[str]] = None, **kwargs): + def __init__(self, modules_to_not_convert: Optional[list[str]] = None, **kwargs): self.quant_method = "int8_symmetric" self.modules_to_not_convert = modules_to_not_convert @@ -120,7 +120,7 @@ class Int8SymmetricConfig(QuantizationConfigMixin): config_dict = self.to_dict() return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n" - def to_diff_dict(self) -> Dict[str, Any]: + def to_diff_dict(self) -> dict[str, Any]: config_dict = self.to_dict() default_config_dict = Int8SymmetricConfig().to_dict() @@ -164,7 +164,7 @@ class Int8SymmetricQuantizer(HfQuantizer): model, param_value: "torch.Tensor", param_name: str, - state_dict: Dict[str, Any], + state_dict: dict[str, Any], **kwargs, ): module, tensor_name = get_module_from_name(model, param_name) @@ -186,8 +186,8 @@ class Int8SymmetricQuantizer(HfQuantizer): param_value: "torch.Tensor", param_name: str, target_device: "torch.device", - state_dict: Dict[str, Any], - unexpected_keys: Optional[List[str]] = None, + state_dict: dict[str, Any], + unexpected_keys: Optional[list[str]] = None, ): """ Quantizes weights to INT8 symmetric format. @@ -202,7 +202,7 @@ class Int8SymmetricQuantizer(HfQuantizer): module._buffers[tensor_name] = weight_quantized.to(target_device) module._buffers["weight_scale"] = weight_scale.to(target_device) - def update_missing_keys(self, model, missing_keys: List[str], prefix: str) -> List[str]: + def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]: not_missing_keys = [] for name, module in model.named_modules(): if isinstance(module, Int8SymmetricLinear): diff --git a/examples/run_on_remote.py b/examples/run_on_remote.py index dff9d268484..2baf1d1a032 100644 --- a/examples/run_on_remote.py +++ b/examples/run_on_remote.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +55,7 @@ if __name__ == "__main__": cluster.run(["pip install torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu117"]) # Run example. You can bypass the CLI wrapper and paste your own code here. - cluster.run([f"python transformers/examples/{args.example} {' '.join(shlex.quote(arg) for arg in unknown)}"]) + cluster.run([f"python transformers/examples/{args.example} {shlex.join(unknown)}"]) # Alternatively, we can just import and run a training function (especially if there's no wrapper CLI): # from my_script... import train diff --git a/examples/tensorflow/contrastive-image-text/run_clip.py b/examples/tensorflow/contrastive-image-text/run_clip.py index 0db9341386e..1b084a603dd 100644 --- a/examples/tensorflow/contrastive-image-text/run_clip.py +++ b/examples/tensorflow/contrastive-image-text/run_clip.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py index a2771e56818..c9f9c9750b9 100644 --- a/examples/tensorflow/image-classification/run_image_classification.py +++ b/examples/tensorflow/image-classification/run_image_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py b/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py index 260f77226b1..aa90a9db527 100644 --- a/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py +++ b/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -178,7 +177,7 @@ def main(args): for i in range(len(serialized_examples)): example = serialized_examples[i] out_file.write(example) - print("Wrote file {} containing {} records".format(filename, records_containing)) + print(f"Wrote file {filename} containing {records_containing} records") shard_count += 1 total_records += records_containing diff --git a/examples/tensorflow/language-modeling-tpu/run_mlm.py b/examples/tensorflow/language-modeling-tpu/run_mlm.py index 7ed111ab127..7b4155f26ed 100644 --- a/examples/tensorflow/language-modeling-tpu/run_mlm.py +++ b/examples/tensorflow/language-modeling-tpu/run_mlm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/language-modeling-tpu/train_unigram.py b/examples/tensorflow/language-modeling-tpu/train_unigram.py index 615f93bc1bf..9eb9c8427b0 100644 --- a/examples/tensorflow/language-modeling-tpu/train_unigram.py +++ b/examples/tensorflow/language-modeling-tpu/train_unigram.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 00cfa6f7d24..d43530669b9 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index 9e1cded9a31..edae71252d5 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index 533640da8b0..9b6ba4228c0 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index 83914c391f6..28418496c23 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/question-answering/utils_qa.py b/examples/tensorflow/question-answering/utils_qa.py index 79497dbb816..f0cc5c26a69 100644 --- a/examples/tensorflow/question-answering/utils_qa.py +++ b/examples/tensorflow/question-answering/utils_qa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2020 The HuggingFace Team All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +19,7 @@ import collections import json import logging import os -from typing import Optional, Tuple +from typing import Optional import numpy as np from tqdm.auto import tqdm @@ -32,7 +31,7 @@ logger = logging.getLogger(__name__) def postprocess_qa_predictions( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -223,7 +222,7 @@ def postprocess_qa_predictions( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" @@ -253,7 +252,7 @@ def postprocess_qa_predictions( def postprocess_qa_predictions_with_beam_search( examples, features, - predictions: Tuple[np.ndarray, np.ndarray], + predictions: tuple[np.ndarray, np.ndarray], version_2_with_negative: bool = False, n_best_size: int = 20, max_answer_length: int = 30, @@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search( # If we have an output_dir, let's save all those dicts. if output_dir is not None: if not os.path.isdir(output_dir): - raise EnvironmentError(f"{output_dir} is not a directory.") + raise OSError(f"{output_dir} is not a directory.") prediction_file = os.path.join( output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json" diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index a77a4767dbc..2a2ef3fb767 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/test_tensorflow_examples.py b/examples/tensorflow/test_tensorflow_examples.py index bbb8bfa3891..46ed20c021d 100644 --- a/examples/tensorflow/test_tensorflow_examples.py +++ b/examples/tensorflow/test_tensorflow_examples.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2022 HuggingFace Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -86,7 +85,7 @@ def get_results(output_dir): results = {} path = os.path.join(output_dir, "all_results.json") if os.path.exists(path): - with open(path, "r") as f: + with open(path) as f: results = json.load(f) else: raise ValueError(f"can't find {path}") diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index e095a3dc00b..2e9096b3642 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2020 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index 1aaa632cd78..45b4a3e607e 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/token-classification/run_ner.py b/examples/tensorflow/token-classification/run_ner.py index 19d153108b1..8a50b2a6503 100644 --- a/examples/tensorflow/token-classification/run_ner.py +++ b/examples/tensorflow/token-classification/run_ner.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 59fb6fc0559..5d9771d425e 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding=utf-8 # Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/training/distributed_training.py b/examples/training/distributed_training.py index f4ebc44437f..af19106a827 100644 --- a/examples/training/distributed_training.py +++ b/examples/training/distributed_training.py @@ -19,16 +19,16 @@ def run(backend): tensor = torch.zeros(1) # Need to put tensor on a GPU device for nccl backend if backend == "nccl": - device = torch.device("cuda:{}".format(LOCAL_RANK)) + device = torch.device(f"cuda:{LOCAL_RANK}") tensor = tensor.to(device) if WORLD_RANK == 0: for rank_recv in range(1, WORLD_SIZE): dist.send(tensor=tensor, dst=rank_recv) - print("worker_{} sent data to Rank {}\n".format(0, rank_recv)) + print(f"worker_{0} sent data to Rank {rank_recv}\n") else: dist.recv(tensor=tensor, src=0) - print("worker_{} has received data from rank {}\n".format(WORLD_RANK, 0)) + print(f"worker_{WORLD_RANK} has received data from rank {0}\n") def init_processes(backend):