mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00
Use Python 3.9 syntax in examples (#37279)
Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
parent
08f36771b3
commit
0fb8d49e88
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -833,8 +832,7 @@ def main():
|
||||
# No need to shuffle here
|
||||
loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False)
|
||||
|
||||
for batch in loader:
|
||||
yield batch
|
||||
yield from loader
|
||||
|
||||
# Metric
|
||||
metric = evaluate.load("rouge", cache_dir=model_args.cache_dir)
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM:
|
||||
" language modeling. "
|
||||
)
|
||||
|
||||
def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding:
|
||||
def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding:
|
||||
# convert list to dict and tensorize input
|
||||
batch = BatchEncoding(
|
||||
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -33,7 +32,7 @@ from itertools import chain
|
||||
|
||||
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling:
|
||||
"You should pass `mlm=False` to train on causal language modeling instead."
|
||||
)
|
||||
|
||||
def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]:
|
||||
def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]:
|
||||
# Handle dict or lists with proper padding and conversion to tensor.
|
||||
batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY)
|
||||
|
||||
@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling:
|
||||
|
||||
def mask_tokens(
|
||||
self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
|
||||
"""
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM:
|
||||
pad_token_id: int
|
||||
decoder_start_token_id: int
|
||||
|
||||
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding:
|
||||
def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding:
|
||||
# convert list to dict and tensorize input
|
||||
batch = BatchEncoding(
|
||||
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from typing import Iterator, List, Union
|
||||
from collections.abc import Iterator
|
||||
from typing import Union
|
||||
|
||||
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers
|
||||
from tokenizers.implementations.base_tokenizer import BaseTokenizer
|
||||
@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
|
||||
|
||||
def train(
|
||||
self,
|
||||
files: Union[str, List[str]],
|
||||
files: Union[str, list[str]],
|
||||
vocab_size: int = 8000,
|
||||
show_progress: bool = True,
|
||||
):
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -28,7 +27,7 @@ import time
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -908,8 +907,8 @@ def main():
|
||||
|
||||
# region Define train step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
start_positions = batch.pop("start_positions")
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -20,7 +19,7 @@ import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
|
||||
def postprocess_qa_predictions(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
|
||||
def postprocess_qa_predictions_with_beam_search(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -25,7 +24,7 @@ import time
|
||||
from dataclasses import field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
|
||||
pad_input_to_multiple_of: Optional[int] = None
|
||||
pad_target_to_multiple_of: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]:
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
model_input_name = self.processor.model_input_names[0]
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -64,7 +63,7 @@ def get_setup_file():
|
||||
def get_results(output_dir, split="eval"):
|
||||
path = os.path.join(output_dir, f"{split}_results.json")
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
raise ValueError(f"can't find {path}")
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -25,7 +24,7 @@ import time
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -572,8 +571,8 @@ def main():
|
||||
|
||||
# define step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
targets = batch.pop("labels")
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -651,8 +650,8 @@ def main():
|
||||
|
||||
# define step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
targets = batch.pop("labels")
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -15,7 +15,7 @@
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
@ -59,7 +59,7 @@ class PlotArguments:
|
||||
default=None,
|
||||
metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."},
|
||||
)
|
||||
short_model_names: Optional[List[str]] = list_field(
|
||||
short_model_names: Optional[list[str]] = list_field(
|
||||
default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."}
|
||||
)
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -18,7 +17,7 @@
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
|
||||
@ -187,7 +186,7 @@ def main():
|
||||
else None
|
||||
)
|
||||
|
||||
def compute_metrics(p: EvalPrediction) -> Dict:
|
||||
def compute_metrics(p: EvalPrediction) -> dict:
|
||||
preds = np.argmax(p.predictions, axis=1)
|
||||
return {"acc": simple_accuracy(preds, p.label_ids)}
|
||||
|
||||
@ -228,7 +227,7 @@ def main():
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
writer.write("{} = {}\n".format(key, value))
|
||||
|
||||
results.update(result)
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -22,7 +21,7 @@ import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import tqdm
|
||||
from filelock import FileLock
|
||||
@ -49,8 +48,8 @@ class InputExample:
|
||||
|
||||
example_id: str
|
||||
question: str
|
||||
contexts: List[str]
|
||||
endings: List[str]
|
||||
contexts: list[str]
|
||||
endings: list[str]
|
||||
label: Optional[str]
|
||||
|
||||
|
||||
@ -62,9 +61,9 @@ class InputFeatures:
|
||||
"""
|
||||
|
||||
example_id: str
|
||||
input_ids: List[List[int]]
|
||||
attention_mask: Optional[List[List[int]]]
|
||||
token_type_ids: Optional[List[List[int]]]
|
||||
input_ids: list[list[int]]
|
||||
attention_mask: Optional[list[list[int]]]
|
||||
token_type_ids: Optional[list[list[int]]]
|
||||
label: Optional[int]
|
||||
|
||||
|
||||
@ -84,7 +83,7 @@ if is_torch_available():
|
||||
soon.
|
||||
"""
|
||||
|
||||
features: List[InputFeatures]
|
||||
features: list[InputFeatures]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -149,7 +148,7 @@ if is_tf_available():
|
||||
soon.
|
||||
"""
|
||||
|
||||
features: List[InputFeatures]
|
||||
features: list[InputFeatures]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -253,7 +252,7 @@ class RaceProcessor(DataProcessor):
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} train".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
high = os.path.join(data_dir, "train/high")
|
||||
middle = os.path.join(data_dir, "train/middle")
|
||||
high = self._read_txt(high)
|
||||
@ -262,7 +261,7 @@ class RaceProcessor(DataProcessor):
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
high = os.path.join(data_dir, "dev/high")
|
||||
middle = os.path.join(data_dir, "dev/middle")
|
||||
high = self._read_txt(high)
|
||||
@ -271,7 +270,7 @@ class RaceProcessor(DataProcessor):
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} test".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} test")
|
||||
high = os.path.join(data_dir, "test/high")
|
||||
middle = os.path.join(data_dir, "test/middle")
|
||||
high = self._read_txt(high)
|
||||
@ -286,7 +285,7 @@ class RaceProcessor(DataProcessor):
|
||||
lines = []
|
||||
files = glob.glob(input_dir + "/*txt")
|
||||
for file in tqdm.tqdm(files, desc="read files"):
|
||||
with open(file, "r", encoding="utf-8") as fin:
|
||||
with open(file, encoding="utf-8") as fin:
|
||||
data_raw = json.load(fin)
|
||||
data_raw["race_id"] = file
|
||||
lines.append(data_raw)
|
||||
@ -296,7 +295,7 @@ class RaceProcessor(DataProcessor):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
examples = []
|
||||
for _, data_raw in enumerate(lines):
|
||||
race_id = "%s-%s" % (set_type, data_raw["race_id"])
|
||||
race_id = "{}-{}".format(set_type, data_raw["race_id"])
|
||||
article = data_raw["article"]
|
||||
for i in range(len(data_raw["answers"])):
|
||||
truth = str(ord(data_raw["answers"][i]) - ord("A"))
|
||||
@ -320,17 +319,17 @@ class SynonymProcessor(DataProcessor):
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} train".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test")
|
||||
|
||||
@ -339,10 +338,10 @@ class SynonymProcessor(DataProcessor):
|
||||
return ["0", "1", "2", "3", "4"]
|
||||
|
||||
def _read_csv(self, input_file):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
with open(input_file, encoding="utf-8") as f:
|
||||
return list(csv.reader(f))
|
||||
|
||||
def _create_examples(self, lines: List[List[str]], type: str):
|
||||
def _create_examples(self, lines: list[list[str]], type: str):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
|
||||
examples = [
|
||||
@ -366,17 +365,17 @@ class SwagProcessor(DataProcessor):
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} train".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
raise ValueError(
|
||||
"For swag testing, the input file does not contain a label column. It can not be tested in current code "
|
||||
"setting!"
|
||||
@ -388,10 +387,10 @@ class SwagProcessor(DataProcessor):
|
||||
return ["0", "1", "2", "3"]
|
||||
|
||||
def _read_csv(self, input_file):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
with open(input_file, encoding="utf-8") as f:
|
||||
return list(csv.reader(f))
|
||||
|
||||
def _create_examples(self, lines: List[List[str]], type: str):
|
||||
def _create_examples(self, lines: list[list[str]], type: str):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
if type == "train" and lines[0][-1] != "label":
|
||||
raise ValueError("For training, the input file must contain a label column.")
|
||||
@ -417,16 +416,16 @@ class ArcProcessor(DataProcessor):
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} train".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info("LOOKING AT {} dev".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
logger.info("LOOKING AT {} test".format(data_dir))
|
||||
logger.info(f"LOOKING AT {data_dir} test")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test")
|
||||
|
||||
def get_labels(self):
|
||||
@ -434,7 +433,7 @@ class ArcProcessor(DataProcessor):
|
||||
return ["0", "1", "2", "3"]
|
||||
|
||||
def _read_json(self, input_file):
|
||||
with open(input_file, "r", encoding="utf-8") as fin:
|
||||
with open(input_file, encoding="utf-8") as fin:
|
||||
lines = fin.readlines()
|
||||
return lines
|
||||
|
||||
@ -504,11 +503,11 @@ class ArcProcessor(DataProcessor):
|
||||
|
||||
|
||||
def convert_examples_to_features(
|
||||
examples: List[InputExample],
|
||||
label_list: List[str],
|
||||
examples: list[InputExample],
|
||||
label_list: list[str],
|
||||
max_length: int,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
) -> List[InputFeatures]:
|
||||
) -> list[InputFeatures]:
|
||||
"""
|
||||
Loads a data file into a list of `InputFeatures`
|
||||
"""
|
||||
|
@ -2,7 +2,7 @@ import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.utilities import rank_zero_info
|
||||
@ -201,7 +201,7 @@ class BaseTransformer(pl.LightningModule):
|
||||
)
|
||||
|
||||
@pl.utilities.rank_zero_only
|
||||
def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
|
||||
def on_save_checkpoint(self, checkpoint: dict[str, Any]) -> None:
|
||||
save_path = self.output_dir.joinpath("best_tfmr")
|
||||
self.model.config.save_step = self.step_count
|
||||
self.model.save_pretrained(save_path)
|
||||
@ -282,7 +282,7 @@ class LoggingCallback(pl.Callback):
|
||||
# Log results
|
||||
for key in sorted(metrics):
|
||||
if key not in ["log", "progress_bar"]:
|
||||
rank_zero_info("{} = {}\n".format(key, str(metrics[key])))
|
||||
rank_zero_info(f"{key} = {str(metrics[key])}\n")
|
||||
|
||||
def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule):
|
||||
rank_zero_info("***** Test results *****")
|
||||
@ -292,8 +292,8 @@ class LoggingCallback(pl.Callback):
|
||||
with open(output_test_results_file, "w") as writer:
|
||||
for key in sorted(metrics):
|
||||
if key not in ["log", "progress_bar"]:
|
||||
rank_zero_info("{} = {}\n".format(key, str(metrics[key])))
|
||||
writer.write("{} = {}\n".format(key, str(metrics[key])))
|
||||
rank_zero_info(f"{key} = {str(metrics[key])}\n")
|
||||
writer.write(f"{key} = {str(metrics[key])}\n")
|
||||
|
||||
|
||||
def add_generic_args(parser, root_dir) -> None:
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -231,14 +230,14 @@ def train(args, train_dataset, model, tokenizer):
|
||||
if args.local_rank == -1 and args.evaluate_during_training:
|
||||
results = evaluate(args, model, tokenizer)
|
||||
for key, value in results.items():
|
||||
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
|
||||
tb_writer.add_scalar(f"eval_{key}", value, global_step)
|
||||
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
|
||||
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
|
||||
logging_loss = tr_loss
|
||||
|
||||
# Save model checkpoint
|
||||
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
||||
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
||||
output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
|
||||
# Take care of distributed/parallel training
|
||||
model_to_save = model.module if hasattr(model, "module") else model
|
||||
model_to_save.save_pretrained(output_dir)
|
||||
@ -281,7 +280,7 @@ def evaluate(args, model, tokenizer, prefix=""):
|
||||
model = torch.nn.DataParallel(model)
|
||||
|
||||
# Eval!
|
||||
logger.info("***** Running evaluation {} *****".format(prefix))
|
||||
logger.info(f"***** Running evaluation {prefix} *****")
|
||||
logger.info(" Num examples = %d", len(dataset))
|
||||
logger.info(" Batch size = %d", args.eval_batch_size)
|
||||
|
||||
@ -348,11 +347,11 @@ def evaluate(args, model, tokenizer, prefix=""):
|
||||
logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset))
|
||||
|
||||
# Compute predictions
|
||||
output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix))
|
||||
output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix))
|
||||
output_prediction_file = os.path.join(args.output_dir, f"predictions_{prefix}.json")
|
||||
output_nbest_file = os.path.join(args.output_dir, f"nbest_predictions_{prefix}.json")
|
||||
|
||||
if args.version_2_with_negative:
|
||||
output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix))
|
||||
output_null_log_odds_file = os.path.join(args.output_dir, f"null_odds_{prefix}.json")
|
||||
else:
|
||||
output_null_log_odds_file = None
|
||||
|
||||
@ -828,10 +827,10 @@ def main():
|
||||
# Evaluate
|
||||
result = evaluate(args, model, tokenizer, prefix=global_step)
|
||||
|
||||
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
|
||||
result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
|
||||
results.update(result)
|
||||
|
||||
logger.info("Results: {}".format(results))
|
||||
logger.info(f"Results: {results}")
|
||||
|
||||
return results
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
|
@ -20,10 +20,10 @@ def fill_mask(masked_input, model, tokenizer, topk=5):
|
||||
topk_filled_outputs = []
|
||||
for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")):
|
||||
predicted_token = predicted_token_bpe.replace("\u2581", " ")
|
||||
if " {0}".format(masked_token) in masked_input:
|
||||
if f" {masked_token}" in masked_input:
|
||||
topk_filled_outputs.append(
|
||||
(
|
||||
masked_input.replace(" {0}".format(masked_token), predicted_token),
|
||||
masked_input.replace(f" {masked_token}", predicted_token),
|
||||
values[index].item(),
|
||||
predicted_token,
|
||||
)
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from ltp import LTP
|
||||
|
||||
@ -42,7 +41,7 @@ def is_chinese(word: str):
|
||||
return 1
|
||||
|
||||
|
||||
def get_chinese_word(tokens: List[str]):
|
||||
def get_chinese_word(tokens: list[str]):
|
||||
word_set = set()
|
||||
|
||||
for token in tokens:
|
||||
@ -53,7 +52,7 @@ def get_chinese_word(tokens: List[str]):
|
||||
return word_list
|
||||
|
||||
|
||||
def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
|
||||
def add_sub_symbol(bert_tokens: list[str], chinese_word_set: set()):
|
||||
if not chinese_word_set:
|
||||
return bert_tokens
|
||||
max_word_len = max([len(w) for w in chinese_word_set])
|
||||
@ -77,7 +76,7 @@ def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
|
||||
return bert_word
|
||||
|
||||
|
||||
def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer):
|
||||
def prepare_ref(lines: list[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokenizer):
|
||||
ltp_res = []
|
||||
|
||||
for i in range(0, len(lines), 100):
|
||||
@ -117,7 +116,7 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni
|
||||
def main(args):
|
||||
# For Chinese (Ro)Bert, the best result is from : RoBERTa-wwm-ext (https://github.com/ymcui/Chinese-BERT-wwm)
|
||||
# If we want to fine-tune these model, we have to use same tokenizer : LTP (https://github.com/HIT-SCIR/ltp)
|
||||
with open(args.file_name, "r", encoding="utf-8") as f:
|
||||
with open(args.file_name, encoding="utf-8") as f:
|
||||
data = f.readlines()
|
||||
data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] # avoid delimiter like '\u2029'
|
||||
ltp_tokenizer = LTP(args.ltp) # faster in GPU device
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -358,7 +357,7 @@ def main():
|
||||
logger.info("***** Eval results *****")
|
||||
for key in sorted(result.keys()):
|
||||
logger.info(" %s = %s", key, str(result[key]))
|
||||
writer.write("%s = %s\n" % (key, str(result[key])))
|
||||
writer.write("{} = {}\n".format(key, str(result[key])))
|
||||
|
||||
results.update(result)
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -163,7 +162,7 @@ def main():
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
n_gpu = torch.cuda.device_count()
|
||||
logger.info("device: {}, n_gpu {}".format(device, n_gpu))
|
||||
logger.info(f"device: {device}, n_gpu {n_gpu}")
|
||||
|
||||
if not args.do_train and not args.do_eval:
|
||||
raise ValueError("At least one of `do_train` or `do_eval` must be True.")
|
||||
@ -261,7 +260,7 @@ def main():
|
||||
loss.item() if exp_average_loss is None else 0.7 * exp_average_loss + 0.3 * loss.item()
|
||||
)
|
||||
nb_tr_steps += 1
|
||||
tqdm_bar.desc = "Training loss: {:.2e} lr: {:.2e}".format(exp_average_loss, scheduler.get_lr()[0])
|
||||
tqdm_bar.desc = f"Training loss: {exp_average_loss:.2e} lr: {scheduler.get_lr()[0]:.2e}"
|
||||
|
||||
# Save a trained model
|
||||
if args.do_train:
|
||||
@ -313,7 +312,7 @@ def main():
|
||||
logger.info("***** Eval results *****")
|
||||
for key in sorted(result.keys()):
|
||||
logger.info(" %s = %s", key, str(result[key]))
|
||||
writer.write("%s = %s\n" % (key, str(result[key])))
|
||||
writer.write("{} = {}\n".format(key, str(result[key])))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -51,7 +50,7 @@ except ImportError:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SwagExample(object):
|
||||
class SwagExample:
|
||||
"""A single training/test example for the SWAG dataset."""
|
||||
|
||||
def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None):
|
||||
@ -71,22 +70,22 @@ class SwagExample(object):
|
||||
|
||||
def __repr__(self):
|
||||
attributes = [
|
||||
"swag_id: {}".format(self.swag_id),
|
||||
"context_sentence: {}".format(self.context_sentence),
|
||||
"start_ending: {}".format(self.start_ending),
|
||||
"ending_0: {}".format(self.endings[0]),
|
||||
"ending_1: {}".format(self.endings[1]),
|
||||
"ending_2: {}".format(self.endings[2]),
|
||||
"ending_3: {}".format(self.endings[3]),
|
||||
f"swag_id: {self.swag_id}",
|
||||
f"context_sentence: {self.context_sentence}",
|
||||
f"start_ending: {self.start_ending}",
|
||||
f"ending_0: {self.endings[0]}",
|
||||
f"ending_1: {self.endings[1]}",
|
||||
f"ending_2: {self.endings[2]}",
|
||||
f"ending_3: {self.endings[3]}",
|
||||
]
|
||||
|
||||
if self.label is not None:
|
||||
attributes.append("label: {}".format(self.label))
|
||||
attributes.append(f"label: {self.label}")
|
||||
|
||||
return ", ".join(attributes)
|
||||
|
||||
|
||||
class InputFeatures(object):
|
||||
class InputFeatures:
|
||||
def __init__(self, example_id, choices_features, label):
|
||||
self.example_id = example_id
|
||||
self.choices_features = [
|
||||
@ -97,7 +96,7 @@ class InputFeatures(object):
|
||||
|
||||
|
||||
def read_swag_examples(input_file, is_training=True):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
with open(input_file, encoding="utf-8") as f:
|
||||
lines = list(csv.reader(f))
|
||||
|
||||
if is_training and lines[0][-1] != "label":
|
||||
@ -179,15 +178,15 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, is_trainin
|
||||
label = example.label
|
||||
if example_index < 5:
|
||||
logger.info("*** Example ***")
|
||||
logger.info("swag_id: {}".format(example.swag_id))
|
||||
logger.info(f"swag_id: {example.swag_id}")
|
||||
for choice_idx, (tokens, input_ids, input_mask, segment_ids) in enumerate(choices_features):
|
||||
logger.info("choice: {}".format(choice_idx))
|
||||
logger.info(f"choice: {choice_idx}")
|
||||
logger.info("tokens: {}".format(" ".join(tokens)))
|
||||
logger.info("input_ids: {}".format(" ".join(map(str, input_ids))))
|
||||
logger.info("input_mask: {}".format(" ".join(map(str, input_mask))))
|
||||
logger.info("segment_ids: {}".format(" ".join(map(str, segment_ids))))
|
||||
if is_training:
|
||||
logger.info("label: {}".format(label))
|
||||
logger.info(f"label: {label}")
|
||||
|
||||
features.append(InputFeatures(example_id=example.swag_id, choices_features=choices_features, label=label))
|
||||
|
||||
@ -382,14 +381,14 @@ def train(args, train_dataset, model, tokenizer):
|
||||
): # Only evaluate when single GPU otherwise metrics may not average well
|
||||
results = evaluate(args, model, tokenizer)
|
||||
for key, value in results.items():
|
||||
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
|
||||
tb_writer.add_scalar(f"eval_{key}", value, global_step)
|
||||
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
|
||||
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
|
||||
logging_loss = tr_loss
|
||||
|
||||
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
||||
# Save model checkpoint
|
||||
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
||||
output_dir = os.path.join(args.output_dir, f"checkpoint-{global_step}")
|
||||
model_to_save = (
|
||||
model.module if hasattr(model, "module") else model
|
||||
) # Take care of distributed/parallel training
|
||||
@ -423,7 +422,7 @@ def evaluate(args, model, tokenizer, prefix=""):
|
||||
eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
|
||||
|
||||
# Eval!
|
||||
logger.info("***** Running evaluation {} *****".format(prefix))
|
||||
logger.info(f"***** Running evaluation {prefix} *****")
|
||||
logger.info(" Num examples = %d", len(dataset))
|
||||
logger.info(" Batch size = %d", args.eval_batch_size)
|
||||
|
||||
@ -466,7 +465,7 @@ def evaluate(args, model, tokenizer, prefix=""):
|
||||
logger.info("***** Eval results *****")
|
||||
for key in sorted(result.keys()):
|
||||
logger.info("%s = %s", key, str(result[key]))
|
||||
writer.write("%s = %s\n" % (key, str(result[key])))
|
||||
writer.write("{} = {}\n".format(key, str(result[key])))
|
||||
|
||||
return result
|
||||
|
||||
@ -710,10 +709,10 @@ def main():
|
||||
# Evaluate
|
||||
result = evaluate(args, model, tokenizer, prefix=global_step)
|
||||
|
||||
result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
|
||||
result = {k + (f"_{global_step}" if global_step else ""): v for k, v in result.items()}
|
||||
results.update(result)
|
||||
|
||||
logger.info("Results: {}".format(results))
|
||||
logger.info(f"Results: {results}")
|
||||
|
||||
return results
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -66,7 +65,7 @@ def main():
|
||||
ptvsd.wait_for_attach()
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
|
||||
logger.info("device: {}".format(device))
|
||||
logger.info(f"device: {device}")
|
||||
|
||||
# Load a pre-processed dataset
|
||||
# You can also build the corpus yourself using TransfoXLCorpus methods
|
||||
@ -111,7 +110,7 @@ def main():
|
||||
total_loss += seq_len * loss.item()
|
||||
total_len += seq_len
|
||||
total_time = time.time() - start_time
|
||||
logger.info("Time : {:.2f}s, {:.2f}ms/segment".format(total_time, 1000 * total_time / (idx + 1)))
|
||||
logger.info(f"Time : {total_time:.2f}s, {1000 * total_time / (idx + 1):.2f}ms/segment")
|
||||
return total_loss / total_len
|
||||
|
||||
# Run on test data.
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 Huggingface
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -13,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import io
|
||||
import json
|
||||
import unittest
|
||||
|
||||
@ -25,7 +23,7 @@ from utils import calculate_bleu
|
||||
|
||||
|
||||
filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json"
|
||||
with io.open(filename, "r", encoding="utf-8") as f:
|
||||
with open(filename, encoding="utf-8") as f:
|
||||
bleu_data = json.load(f)
|
||||
|
||||
|
||||
|
@ -19,7 +19,6 @@ import time
|
||||
from json import JSONDecodeError
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
@ -55,10 +54,10 @@ def eval_data_dir(
|
||||
task="summarization",
|
||||
local_rank=None,
|
||||
num_return_sequences=1,
|
||||
dataset_kwargs: Dict = None,
|
||||
dataset_kwargs: dict = None,
|
||||
prefix="",
|
||||
**generate_kwargs,
|
||||
) -> Dict:
|
||||
) -> dict:
|
||||
"""Run evaluation on part of the data for one gpu and save to {save_dir}/rank_{rank}_output.json"""
|
||||
model_name = str(model_name)
|
||||
assert local_rank is not None
|
||||
@ -211,7 +210,7 @@ def run_generate():
|
||||
calc_bleu = "translation" in args.task
|
||||
score_fn = calculate_bleu if calc_bleu else calculate_rouge
|
||||
metric_name = "bleu" if calc_bleu else "rouge"
|
||||
metrics: Dict = score_fn(preds, labels)
|
||||
metrics: dict = score_fn(preds, labels)
|
||||
metrics["n_obs"] = len(preds)
|
||||
runtime = time.time() - start_time
|
||||
metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4)
|
||||
@ -227,7 +226,7 @@ def run_generate():
|
||||
shutil.rmtree(json_save_dir)
|
||||
|
||||
|
||||
def combine_partial_results(partial_results) -> List:
|
||||
def combine_partial_results(partial_results) -> list:
|
||||
"""Concatenate partial results into one file, then sort it by id."""
|
||||
records = []
|
||||
for partial_result in partial_results:
|
||||
@ -237,7 +236,7 @@ def combine_partial_results(partial_results) -> List:
|
||||
return preds
|
||||
|
||||
|
||||
def gather_results_from_each_node(num_replicas, save_dir, timeout) -> List[Dict[str, List]]:
|
||||
def gather_results_from_each_node(num_replicas, save_dir, timeout) -> list[dict[str, list]]:
|
||||
# WAIT FOR lots of .json files
|
||||
start_wait = time.time()
|
||||
logger.info("waiting for all nodes to finish")
|
||||
|
@ -20,7 +20,6 @@ import time
|
||||
import warnings
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
@ -36,7 +35,7 @@ DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
|
||||
def generate_summaries_or_translations(
|
||||
examples: List[str],
|
||||
examples: list[str],
|
||||
out_file: str,
|
||||
model_name: str,
|
||||
batch_size: int = 8,
|
||||
@ -45,7 +44,7 @@ def generate_summaries_or_translations(
|
||||
task="summarization",
|
||||
prefix=None,
|
||||
**generate_kwargs,
|
||||
) -> Dict:
|
||||
) -> dict:
|
||||
"""Save model.generate results to <out_file>, and return how long it took."""
|
||||
fout = Path(out_file).open("w", encoding="utf-8")
|
||||
model_name = str(model_name)
|
||||
|
@ -34,7 +34,7 @@ task_score_names = {
|
||||
|
||||
def parse_search_arg(search):
|
||||
groups = search.split()
|
||||
entries = dict((g.split("=") for g in groups))
|
||||
entries = dict(g.split("=") for g in groups)
|
||||
entry_names = list(entries.keys())
|
||||
sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
|
||||
matrix = [list(x) for x in itertools.product(*sets)]
|
||||
@ -105,7 +105,7 @@ def run_search():
|
||||
col_widths = {col: len(str(col)) for col in col_names}
|
||||
results = []
|
||||
for r in matrix:
|
||||
hparams = dict((x.replace("--", "").split() for x in r))
|
||||
hparams = dict(x.replace("--", "").split() for x in r)
|
||||
args_exp = " ".join(r).split()
|
||||
args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
|
||||
sys.argv = args_normal + args_exp
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -172,10 +172,10 @@ class Seq2SeqTrainer(Trainer):
|
||||
def prediction_step(
|
||||
self,
|
||||
model: nn.Module,
|
||||
inputs: Dict[str, Union[torch.Tensor, Any]],
|
||||
inputs: dict[str, Union[torch.Tensor, Any]],
|
||||
prediction_loss_only: bool,
|
||||
ignore_keys: Optional[List[str]] = None,
|
||||
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
ignore_keys: Optional[list[str]] = None,
|
||||
) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
"""
|
||||
Perform an evaluation step on :obj:`model` using obj:`inputs`.
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import io
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
@ -29,5 +28,5 @@ def get_all_data(pairs, n_objs):
|
||||
|
||||
text = get_all_data(pairs, n_objs)
|
||||
filename = "./fsmt_val_data.json"
|
||||
with io.open(filename, "w", encoding="utf-8") as f:
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)
|
||||
|
@ -19,9 +19,10 @@ import math
|
||||
import os
|
||||
import pickle
|
||||
import socket
|
||||
from collections.abc import Iterable
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, Iterable, List, Tuple, Union
|
||||
from typing import Callable, Union
|
||||
|
||||
import git
|
||||
import numpy as np
|
||||
@ -67,7 +68,7 @@ def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=-100):
|
||||
return loss, nll_loss
|
||||
|
||||
|
||||
def lmap(f: Callable, x: Iterable) -> List:
|
||||
def lmap(f: Callable, x: Iterable) -> list:
|
||||
"""list(map(f, x))"""
|
||||
return list(map(f, x))
|
||||
|
||||
@ -77,11 +78,11 @@ def calculate_bleu(output_lns, refs_lns, **kwargs) -> dict:
|
||||
return {"bleu": round(corpus_bleu(output_lns, [refs_lns], **kwargs).score, 4)}
|
||||
|
||||
|
||||
def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], Dict]:
|
||||
def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) -> Callable[[EvalPrediction], dict]:
|
||||
def non_pad_len(tokens: np.ndarray) -> int:
|
||||
return np.count_nonzero(tokens != tokenizer.pad_token_id)
|
||||
|
||||
def decode_pred(pred: EvalPrediction) -> Tuple[List[str], List[str]]:
|
||||
def decode_pred(pred: EvalPrediction) -> tuple[list[str], list[str]]:
|
||||
pred_ids = pred.predictions
|
||||
label_ids = pred.label_ids
|
||||
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
|
||||
@ -91,16 +92,16 @@ def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokenizer) ->
|
||||
label_str = lmap(str.strip, label_str)
|
||||
return pred_str, label_str
|
||||
|
||||
def summarization_metrics(pred: EvalPrediction) -> Dict:
|
||||
def summarization_metrics(pred: EvalPrediction) -> dict:
|
||||
pred_str, label_str = decode_pred(pred)
|
||||
rouge: Dict = calculate_rouge(pred_str, label_str)
|
||||
rouge: dict = calculate_rouge(pred_str, label_str)
|
||||
summ_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
|
||||
rouge.update({"gen_len": summ_len})
|
||||
return rouge
|
||||
|
||||
def translation_metrics(pred: EvalPrediction) -> Dict:
|
||||
def translation_metrics(pred: EvalPrediction) -> dict:
|
||||
pred_str, label_str = decode_pred(pred)
|
||||
bleu: Dict = calculate_bleu(pred_str, label_str)
|
||||
bleu: dict = calculate_bleu(pred_str, label_str)
|
||||
gen_len = np.round(np.mean(lmap(non_pad_len, pred.predictions)), 1)
|
||||
bleu.update({"gen_len": gen_len})
|
||||
return bleu
|
||||
@ -183,7 +184,7 @@ class AbstractSeq2SeqDataset(Dataset):
|
||||
return min(self.src_lens[i], self.max_target_length)
|
||||
|
||||
# call fairseq cython function
|
||||
batch_sampler: List[List[int]] = batch_by_size(
|
||||
batch_sampler: list[list[int]] = batch_by_size(
|
||||
sorted_indices,
|
||||
num_tokens_fn=num_tokens_in_example,
|
||||
max_tokens=max_tokens_per_batch,
|
||||
@ -207,7 +208,7 @@ class AbstractSeq2SeqDataset(Dataset):
|
||||
|
||||
|
||||
class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
|
||||
def __getitem__(self, index) -> Dict[str, torch.Tensor]:
|
||||
def __getitem__(self, index) -> dict[str, torch.Tensor]:
|
||||
"""Call tokenizer on src and tgt_lines"""
|
||||
index = index + 1 # linecache starts at 1
|
||||
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
|
||||
@ -237,7 +238,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
|
||||
**self.dataset_kwargs,
|
||||
)
|
||||
|
||||
def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
|
||||
def collate_fn(self, batch) -> dict[str, torch.Tensor]:
|
||||
input_ids = torch.stack([x["input_ids"] for x in batch])
|
||||
masks = torch.stack([x["attention_mask"] for x in batch])
|
||||
target_ids = torch.stack([x["labels"] for x in batch])
|
||||
@ -255,7 +256,7 @@ class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
|
||||
class Seq2SeqDataset(AbstractSeq2SeqDataset):
|
||||
"""A dataset that calls prepare_seq2seq_batch."""
|
||||
|
||||
def __getitem__(self, index) -> Dict[str, str]:
|
||||
def __getitem__(self, index) -> dict[str, str]:
|
||||
index = index + 1 # linecache starts at 1
|
||||
source_line = self.prefix + linecache.getline(str(self.src_file), index).rstrip("\n")
|
||||
tgt_line = linecache.getline(str(self.tgt_file), index).rstrip("\n")
|
||||
@ -263,9 +264,9 @@ class Seq2SeqDataset(AbstractSeq2SeqDataset):
|
||||
assert tgt_line, f"empty tgt line for index {index}"
|
||||
return {"tgt_texts": tgt_line, "src_texts": source_line, "id": index - 1}
|
||||
|
||||
def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
|
||||
def collate_fn(self, batch) -> dict[str, torch.Tensor]:
|
||||
"""Call prepare_seq2seq_batch."""
|
||||
batch_encoding: Dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch(
|
||||
batch_encoding: dict[str, torch.Tensor] = self.tokenizer.prepare_seq2seq_batch(
|
||||
[x["src_texts"] for x in batch],
|
||||
tgt_texts=[x["tgt_texts"] for x in batch],
|
||||
max_length=self.max_source_length,
|
||||
@ -293,7 +294,7 @@ class Seq2SeqDataCollator:
|
||||
if data_args.tgt_lang is not None:
|
||||
self.dataset_kwargs["tgt_lang"] = data_args.tgt_lang
|
||||
|
||||
def __call__(self, batch) -> Dict[str, torch.Tensor]:
|
||||
def __call__(self, batch) -> dict[str, torch.Tensor]:
|
||||
if hasattr(self.tokenizer, "prepare_seq2seq_batch"):
|
||||
batch = self._encode(batch)
|
||||
input_ids, attention_mask, labels = (
|
||||
@ -329,7 +330,7 @@ class Seq2SeqDataCollator:
|
||||
shifted_input_ids[..., 0] = self.pad_token_id
|
||||
return shifted_input_ids
|
||||
|
||||
def _encode(self, batch) -> Dict[str, torch.Tensor]:
|
||||
def _encode(self, batch) -> dict[str, torch.Tensor]:
|
||||
batch_encoding = self.tokenizer.prepare_seq2seq_batch(
|
||||
[x["src_texts"] for x in batch],
|
||||
tgt_texts=[x["tgt_texts"] for x in batch],
|
||||
@ -355,7 +356,7 @@ class SortishSampler(Sampler):
|
||||
return iter(sortish_sampler_indices(self.data, self.bs, shuffle=self.shuffle))
|
||||
|
||||
|
||||
def sortish_sampler_indices(data: List, bs: int, shuffle=True) -> np.array:
|
||||
def sortish_sampler_indices(data: list, bs: int, shuffle=True) -> np.array:
|
||||
"Go through the text data by order of src length with a bit of randomness. From fastai repo."
|
||||
if not shuffle:
|
||||
return np.argsort(np.array(data) * -1)
|
||||
@ -455,7 +456,7 @@ def pickle_save(obj, path):
|
||||
return pickle.dump(obj, f)
|
||||
|
||||
|
||||
def flatten_list(summary_ids: List[List]):
|
||||
def flatten_list(summary_ids: list[list]):
|
||||
return list(itertools.chain.from_iterable(summary_ids))
|
||||
|
||||
|
||||
@ -506,14 +507,14 @@ def extract_rouge_mid_statistics(dct):
|
||||
|
||||
|
||||
def calculate_rouge(
|
||||
pred_lns: List[str],
|
||||
tgt_lns: List[str],
|
||||
pred_lns: list[str],
|
||||
tgt_lns: list[str],
|
||||
use_stemmer=True,
|
||||
rouge_keys=ROUGE_KEYS,
|
||||
return_precision_and_recall=False,
|
||||
bootstrap_aggregation=True,
|
||||
newline_sep=True,
|
||||
) -> Dict:
|
||||
) -> dict:
|
||||
"""Calculate rouge using rouge_scorer package.
|
||||
|
||||
Args:
|
||||
@ -590,19 +591,19 @@ def any_requires_grad(model: nn.Module) -> bool:
|
||||
|
||||
|
||||
def assert_all_frozen(model):
|
||||
model_grads: List[bool] = list(grad_status(model))
|
||||
model_grads: list[bool] = list(grad_status(model))
|
||||
n_require_grad = sum(lmap(int, model_grads))
|
||||
npars = len(model_grads)
|
||||
assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad"
|
||||
|
||||
|
||||
def assert_not_all_frozen(model):
|
||||
model_grads: List[bool] = list(grad_status(model))
|
||||
model_grads: list[bool] = list(grad_status(model))
|
||||
npars = len(model_grads)
|
||||
assert any(model_grads), f"none of {npars} weights require grad"
|
||||
|
||||
|
||||
def parse_numeric_n_bool_cl_kwargs(unparsed_args: List[str]) -> Dict[str, Union[int, float, bool]]:
|
||||
def parse_numeric_n_bool_cl_kwargs(unparsed_args: list[str]) -> dict[str, Union[int, float, bool]]:
|
||||
"""
|
||||
Parse an argv list of unspecified command line args to a dict.
|
||||
Assumes all values are either numeric or boolean in the form of true/false.
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -20,7 +19,7 @@ import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from importlib import import_module
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
|
||||
@ -159,7 +158,7 @@ def main():
|
||||
|
||||
# Prepare CONLL-2003 task
|
||||
labels = token_classification_task.get_labels(data_args.labels)
|
||||
label_map: Dict[int, str] = dict(enumerate(labels))
|
||||
label_map: dict[int, str] = dict(enumerate(labels))
|
||||
num_labels = len(labels)
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
@ -217,7 +216,7 @@ def main():
|
||||
else None
|
||||
)
|
||||
|
||||
def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[List[int], List[int]]:
|
||||
def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> tuple[list[int], list[int]]:
|
||||
preds = np.argmax(predictions, axis=2)
|
||||
|
||||
batch_size, seq_len = preds.shape
|
||||
@ -233,7 +232,7 @@ def main():
|
||||
|
||||
return preds_list, out_label_list
|
||||
|
||||
def compute_metrics(p: EvalPrediction) -> Dict:
|
||||
def compute_metrics(p: EvalPrediction) -> dict:
|
||||
preds_list, out_label_list = align_predictions(p.predictions, p.label_ids)
|
||||
return {
|
||||
"accuracy_score": accuracy_score(out_label_list, preds_list),
|
||||
@ -279,7 +278,7 @@ def main():
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
writer.write("{} = {}\n".format(key, value))
|
||||
|
||||
results.update(result)
|
||||
|
||||
@ -304,13 +303,13 @@ def main():
|
||||
with open(output_test_results_file, "w") as writer:
|
||||
for key, value in metrics.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
writer.write("{} = {}\n".format(key, value))
|
||||
|
||||
# Save predictions
|
||||
output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_test_predictions_file, "w") as writer:
|
||||
with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f:
|
||||
with open(os.path.join(data_args.data_dir, "test.txt")) as f:
|
||||
token_classification_task.write_predictions_to_file(writer, f, preds_list)
|
||||
|
||||
return results
|
||||
|
@ -12,7 +12,7 @@ subword_len_counter = 0
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
||||
max_len -= tokenizer.num_special_tokens_to_add()
|
||||
|
||||
with open(dataset, "rt") as f_p:
|
||||
with open(dataset) as f_p:
|
||||
for line in f_p:
|
||||
line = line.rstrip()
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import List, TextIO, Union
|
||||
from typing import TextIO, Union
|
||||
|
||||
from conllu import parse_incr
|
||||
from utils_ner import InputExample, Split, TokenClassificationTask
|
||||
@ -14,7 +14,7 @@ class NER(TokenClassificationTask):
|
||||
# in NER datasets, the last column is usually reserved for NER label
|
||||
self.label_idx = label_idx
|
||||
|
||||
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]:
|
||||
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
|
||||
if isinstance(mode, Split):
|
||||
mode = mode.value
|
||||
file_path = os.path.join(data_dir, f"{mode}.txt")
|
||||
@ -42,7 +42,7 @@ class NER(TokenClassificationTask):
|
||||
examples.append(InputExample(guid=f"{mode}-{guid_index}", words=words, labels=labels))
|
||||
return examples
|
||||
|
||||
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List):
|
||||
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
|
||||
example_id = 0
|
||||
for line in test_input_reader:
|
||||
if line.startswith("-DOCSTART-") or line == "" or line == "\n":
|
||||
@ -55,9 +55,9 @@ class NER(TokenClassificationTask):
|
||||
else:
|
||||
logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0])
|
||||
|
||||
def get_labels(self, path: str) -> List[str]:
|
||||
def get_labels(self, path: str) -> list[str]:
|
||||
if path:
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
labels = f.read().splitlines()
|
||||
if "O" not in labels:
|
||||
labels = ["O"] + labels
|
||||
@ -71,9 +71,9 @@ class Chunk(NER):
|
||||
# in CONLL2003 dataset chunk column is second-to-last
|
||||
super().__init__(label_idx=-2)
|
||||
|
||||
def get_labels(self, path: str) -> List[str]:
|
||||
def get_labels(self, path: str) -> list[str]:
|
||||
if path:
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
labels = f.read().splitlines()
|
||||
if "O" not in labels:
|
||||
labels = ["O"] + labels
|
||||
@ -105,7 +105,7 @@ class Chunk(NER):
|
||||
|
||||
|
||||
class POS(TokenClassificationTask):
|
||||
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> List[InputExample]:
|
||||
def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -> list[InputExample]:
|
||||
if isinstance(mode, Split):
|
||||
mode = mode.value
|
||||
file_path = os.path.join(data_dir, f"{mode}.txt")
|
||||
@ -125,7 +125,7 @@ class POS(TokenClassificationTask):
|
||||
guid_index += 1
|
||||
return examples
|
||||
|
||||
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: List):
|
||||
def write_predictions_to_file(self, writer: TextIO, test_input_reader: TextIO, preds_list: list):
|
||||
example_id = 0
|
||||
for sentence in parse_incr(test_input_reader):
|
||||
s_p = preds_list[example_id]
|
||||
@ -136,9 +136,9 @@ class POS(TokenClassificationTask):
|
||||
writer.write(out)
|
||||
example_id += 1
|
||||
|
||||
def get_labels(self, path: str) -> List[str]:
|
||||
def get_labels(self, path: str) -> list[str]:
|
||||
if path:
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
return f.read().splitlines()
|
||||
else:
|
||||
return [
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -19,7 +18,7 @@ import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
@ -42,8 +41,8 @@ class InputExample:
|
||||
"""
|
||||
|
||||
guid: str
|
||||
words: List[str]
|
||||
labels: Optional[List[str]]
|
||||
words: list[str]
|
||||
labels: Optional[list[str]]
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -53,10 +52,10 @@ class InputFeatures:
|
||||
Property names are the same names as the corresponding inputs to a model.
|
||||
"""
|
||||
|
||||
input_ids: List[int]
|
||||
attention_mask: List[int]
|
||||
token_type_ids: Optional[List[int]] = None
|
||||
label_ids: Optional[List[int]] = None
|
||||
input_ids: list[int]
|
||||
attention_mask: list[int]
|
||||
token_type_ids: Optional[list[int]] = None
|
||||
label_ids: Optional[list[int]] = None
|
||||
|
||||
|
||||
class Split(Enum):
|
||||
@ -67,17 +66,17 @@ class Split(Enum):
|
||||
|
||||
class TokenClassificationTask:
|
||||
@staticmethod
|
||||
def read_examples_from_file(data_dir, mode: Union[Split, str]) -> List[InputExample]:
|
||||
def read_examples_from_file(data_dir, mode: Union[Split, str]) -> list[InputExample]:
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def get_labels(path: str) -> List[str]:
|
||||
def get_labels(path: str) -> list[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def convert_examples_to_features(
|
||||
examples: List[InputExample],
|
||||
label_list: List[str],
|
||||
examples: list[InputExample],
|
||||
label_list: list[str],
|
||||
max_seq_length: int,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
cls_token_at_end=False,
|
||||
@ -91,7 +90,7 @@ class TokenClassificationTask:
|
||||
pad_token_label_id=-100,
|
||||
sequence_a_segment_id=0,
|
||||
mask_padding_with_zero=True,
|
||||
) -> List[InputFeatures]:
|
||||
) -> list[InputFeatures]:
|
||||
"""Loads a data file into a list of `InputFeatures`
|
||||
`cls_token_at_end` define the location of the CLS token:
|
||||
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
|
||||
@ -214,7 +213,7 @@ if is_torch_available():
|
||||
soon.
|
||||
"""
|
||||
|
||||
features: List[InputFeatures]
|
||||
features: list[InputFeatures]
|
||||
pad_token_label_id: int = nn.CrossEntropyLoss().ignore_index
|
||||
# Use cross entropy ignore_index as padding label id so that only
|
||||
# real label ids contribute to the loss later.
|
||||
@ -224,7 +223,7 @@ if is_torch_available():
|
||||
token_classification_task: TokenClassificationTask,
|
||||
data_dir: str,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
labels: List[str],
|
||||
labels: list[str],
|
||||
model_type: str,
|
||||
max_seq_length: Optional[int] = None,
|
||||
overwrite_cache=False,
|
||||
@ -233,7 +232,7 @@ if is_torch_available():
|
||||
# Load data features from cache or dataset file
|
||||
cached_features_file = os.path.join(
|
||||
data_dir,
|
||||
"cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
|
||||
f"cached_{mode.value}_{tokenizer.__class__.__name__}_{str(max_seq_length)}",
|
||||
)
|
||||
|
||||
# Make sure only the first process in distributed training processes the dataset,
|
||||
@ -283,7 +282,7 @@ if is_tf_available():
|
||||
soon.
|
||||
"""
|
||||
|
||||
features: List[InputFeatures]
|
||||
features: list[InputFeatures]
|
||||
pad_token_label_id: int = -100
|
||||
# Use cross entropy ignore_index as padding label id so that only
|
||||
# real label ids contribute to the loss later.
|
||||
@ -293,7 +292,7 @@ if is_tf_available():
|
||||
token_classification_task: TokenClassificationTask,
|
||||
data_dir: str,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
labels: List[str],
|
||||
labels: list[str],
|
||||
model_type: str,
|
||||
max_seq_length: Optional[int] = None,
|
||||
overwrite_cache=False,
|
||||
|
@ -4,7 +4,7 @@
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_new_imgproc_model.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@ -74,13 +74,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
size: dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: Union[int, float] = 1 / 255,
|
||||
do_normalize: bool = True,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
image_mean: Optional[Union[float, list[float]]] = None,
|
||||
image_std: Optional[Union[float, list[float]]] = None,
|
||||
do_convert_rgb: bool = True,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
@ -101,7 +101,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
|
||||
def resize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
size: Dict[str, int],
|
||||
size: dict[str, int],
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@ -151,13 +151,13 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
images: ImageInput,
|
||||
do_resize: Optional[bool] = None,
|
||||
size: Optional[Dict[str, int]] = None,
|
||||
size: Optional[dict[str, int]] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
do_normalize: Optional[bool] = None,
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
image_mean: Optional[Union[float, list[float]]] = None,
|
||||
image_std: Optional[Union[float, list[float]]] = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_add_function.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
# Note that zamba does not have the `apply_rotary_pos_emb` function!
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -62,5 +62,5 @@ class TestAttention(nn.Module):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
def forward(self) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
_ = apply_rotary_pos_emb(1, 1, 1, 1)
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_dummy.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from functools import partial
|
||||
from typing import Callable, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -223,12 +223,12 @@ class DummyAttention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
||||
position_embeddings: tuple[torch.Tensor, torch.Tensor],
|
||||
attention_mask: Optional[torch.Tensor],
|
||||
past_key_value: Optional[Cache] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -290,9 +290,9 @@ class DummyDecoderLayer(nn.Module):
|
||||
output_attentions: Optional[bool] = False,
|
||||
use_cache: Optional[bool] = False,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
residual = hidden_states
|
||||
|
||||
hidden_states = self.input_layernorm(hidden_states)
|
||||
@ -494,7 +494,7 @@ class DummyModel(DummyPreTrainedModel):
|
||||
return_dict: Optional[bool] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||
) -> Union[tuple, BaseModelOutputWithPast]:
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
|
@ -6,7 +6,7 @@
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
import math
|
||||
import os
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
from packaging import version
|
||||
@ -136,9 +136,9 @@ class DummyBertSelfAttention(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
mixed_query_layer = self.query(hidden_states)
|
||||
|
||||
# If this is instantiated as a cross-attention module, the keys
|
||||
@ -245,9 +245,9 @@ class DummyBertSdpaSelfAttention(DummyBertSelfAttention):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
|
||||
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
|
||||
logger.warning_once(
|
||||
@ -386,9 +386,9 @@ class DummyBertAttention(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
self_outputs = self.self(
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
@ -454,9 +454,9 @@ class DummyBertLayer(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
|
||||
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
|
||||
self_attention_outputs = self.attention(
|
||||
@ -532,12 +532,12 @@ class DummyBertEncoder(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
output_hidden_states: Optional[bool] = False,
|
||||
return_dict: Optional[bool] = True,
|
||||
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
|
||||
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_self_attentions = () if output_attentions else None
|
||||
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
|
||||
@ -858,12 +858,12 @@ class DummyBertModel(DummyBertPreTrainedModel):
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
encoder_hidden_states: Optional[torch.Tensor] = None,
|
||||
encoder_attention_mask: Optional[torch.Tensor] = None,
|
||||
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
||||
past_key_values: Optional[list[torch.FloatTensor]] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
r"""
|
||||
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
|
||||
|
@ -4,7 +4,7 @@
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_from_uppercase_model.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -53,7 +53,7 @@ class FromUppercaseModelAttention(nn.Module):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
"""Input shape: Batch x Time x Channel"""
|
||||
|
||||
bsz, tgt_len, embed_dim = hidden_states.size()
|
||||
@ -148,7 +148,7 @@ class FromUppercaseModelFlashAttention2(FromUppercaseModelAttention):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
output_attentions = False
|
||||
|
||||
batch_size, q_len, _ = hidden_states.size()
|
||||
@ -226,7 +226,7 @@ class FromUppercaseModelSdpaAttention(FromUppercaseModelAttention):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
if output_attentions:
|
||||
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
|
||||
logger.warning_once(
|
||||
@ -322,7 +322,7 @@ class FromUppercaseModelEncoderLayer(nn.Module):
|
||||
attention_mask: torch.Tensor,
|
||||
causal_attention_mask: torch.Tensor,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.FloatTensor]:
|
||||
) -> tuple[torch.FloatTensor]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_multimodal1.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from functools import partial
|
||||
from typing import Callable, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -223,12 +223,12 @@ class Multimodal1TextAttention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
||||
position_embeddings: tuple[torch.Tensor, torch.Tensor],
|
||||
attention_mask: Optional[torch.Tensor],
|
||||
past_key_value: Optional[Cache] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -290,9 +290,9 @@ class Multimodal1TextDecoderLayer(nn.Module):
|
||||
output_attentions: Optional[bool] = False,
|
||||
use_cache: Optional[bool] = False,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
residual = hidden_states
|
||||
|
||||
hidden_states = self.input_layernorm(hidden_states)
|
||||
@ -494,7 +494,7 @@ class Multimodal1TextModel(Multimodal1TextPreTrainedModel):
|
||||
return_dict: Optional[bool] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||
) -> Union[tuple, BaseModelOutputWithPast]:
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_multimodal2.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
|
||||
from typing import Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -65,7 +65,7 @@ class Multimodal2VisionAttention(nn.Module):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
"""Input shape: Batch x Time x Channel"""
|
||||
|
||||
bsz, tgt_len, embed_dim = hidden_states.size()
|
||||
@ -152,7 +152,7 @@ class Multimodal2VisionSdpaAttention(Multimodal2VisionAttention):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
if output_attentions:
|
||||
# TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
|
||||
logger.warning_once(
|
||||
@ -233,7 +233,7 @@ class Multimodal2VisionFlashAttention2(Multimodal2VisionAttention):
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
causal_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
output_attentions = False
|
||||
|
||||
batch_size, q_len, _ = hidden_states.size()
|
||||
@ -334,7 +334,7 @@ class Multimodal2VisionEncoderLayer(nn.Module):
|
||||
attention_mask: torch.Tensor,
|
||||
causal_attention_mask: torch.Tensor,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.FloatTensor]:
|
||||
) -> tuple[torch.FloatTensor]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
@ -392,7 +392,7 @@ class Multimodal2VisionEncoder(nn.Module):
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple, BaseModelOutput]:
|
||||
) -> Union[tuple, BaseModelOutput]:
|
||||
r"""
|
||||
Args:
|
||||
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
|
||||
@ -587,7 +587,7 @@ class Multimodal2VisionTransformer(nn.Module):
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
interpolate_pos_encoding: Optional[bool] = False,
|
||||
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
||||
) -> Union[tuple, BaseModelOutputWithPooling]:
|
||||
r"""
|
||||
Returns:
|
||||
|
||||
@ -671,7 +671,7 @@ class Multimodal2VisionModel(Multimodal2VisionPreTrainedModel):
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
interpolate_pos_encoding: bool = False,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
||||
) -> Union[tuple, BaseModelOutputWithPooling]:
|
||||
r"""
|
||||
Returns:
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_my_new_model2.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from typing import Callable, List, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -222,12 +222,12 @@ class MyNewModel2Attention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
||||
position_embeddings: tuple[torch.Tensor, torch.Tensor],
|
||||
attention_mask: Optional[torch.Tensor],
|
||||
past_key_value: Optional[Cache] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -289,9 +289,9 @@ class MyNewModel2DecoderLayer(nn.Module):
|
||||
output_attentions: Optional[bool] = False,
|
||||
use_cache: Optional[bool] = False,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
residual = hidden_states
|
||||
|
||||
hidden_states = self.input_layernorm(hidden_states)
|
||||
@ -485,7 +485,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
|
||||
input_ids: torch.LongTensor = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
@ -493,7 +493,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
|
||||
return_dict: Optional[bool] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs, # NOOP kwarg for now
|
||||
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||
) -> Union[tuple, BaseModelOutputWithPast]:
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
@ -753,14 +753,14 @@ class MyNewModel2ForSequenceClassification(MyNewModel2PreTrainedModel):
|
||||
input_ids: Optional[torch.LongTensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
labels: Optional[torch.LongTensor] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple, SequenceClassifierOutputWithPast]:
|
||||
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
||||
r"""
|
||||
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_new_task_model.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from dataclasses import dataclass
|
||||
from typing import ClassVar, List, Optional, Tuple, Union
|
||||
from typing import ClassVar, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -61,9 +61,9 @@ class NewTaskModelCausalLMOutputWithPast(ModelOutput):
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
logits: torch.FloatTensor = None
|
||||
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None
|
||||
hidden_states: Optional[tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[tuple[torch.FloatTensor]] = None
|
||||
image_hidden_states: Optional[torch.FloatTensor] = None
|
||||
|
||||
|
||||
@ -337,7 +337,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
|
||||
pixel_values: torch.FloatTensor = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None,
|
||||
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
|
||||
token_type_ids: Optional[torch.LongTensor] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
@ -347,7 +347,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
num_logits_to_keep: int = 0,
|
||||
) -> Union[Tuple, NewTaskModelCausalLMOutputWithPast]:
|
||||
) -> Union[tuple, NewTaskModelCausalLMOutputWithPast]:
|
||||
r"""
|
||||
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||||
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
||||
|
@ -6,7 +6,7 @@
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
import math
|
||||
import os
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@ -139,9 +139,9 @@ class RobertaSelfAttention(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
mixed_query_layer = self.query(hidden_states)
|
||||
|
||||
# If this is instantiated as a cross-attention module, the keys
|
||||
@ -248,9 +248,9 @@ class RobertaSdpaSelfAttention(RobertaSelfAttention):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
|
||||
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
|
||||
logger.warning_once(
|
||||
@ -389,9 +389,9 @@ class RobertaAttention(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
self_outputs = self.self(
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
@ -457,9 +457,9 @@ class RobertaLayer(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor]:
|
||||
) -> tuple[torch.Tensor]:
|
||||
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
|
||||
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
|
||||
self_attention_outputs = self.attention(
|
||||
@ -535,12 +535,12 @@ class RobertaEncoder(nn.Module):
|
||||
head_mask: Optional[torch.FloatTensor] = None,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
output_hidden_states: Optional[bool] = False,
|
||||
return_dict: Optional[bool] = True,
|
||||
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
|
||||
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_self_attentions = () if output_attentions else None
|
||||
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
|
||||
@ -861,12 +861,12 @@ class RobertaModel(RobertaPreTrainedModel):
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
encoder_hidden_states: Optional[torch.Tensor] = None,
|
||||
encoder_attention_mask: Optional[torch.Tensor] = None,
|
||||
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
||||
past_key_values: Optional[list[torch.FloatTensor]] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
r"""
|
||||
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
|
||||
|
@ -4,7 +4,7 @@
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_super.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
from typing import Callable, List, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -222,12 +222,12 @@ class SuperAttention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
||||
position_embeddings: tuple[torch.Tensor, torch.Tensor],
|
||||
attention_mask: Optional[torch.Tensor],
|
||||
past_key_value: Optional[Cache] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -289,9 +289,9 @@ class SuperDecoderLayer(nn.Module):
|
||||
output_attentions: Optional[bool] = False,
|
||||
use_cache: Optional[bool] = False,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
residual = hidden_states
|
||||
|
||||
hidden_states = self.input_layernorm(hidden_states)
|
||||
@ -485,14 +485,14 @@ class SuperModel(SuperPreTrainedModel):
|
||||
input_ids: torch.LongTensor = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||
) -> Union[tuple, BaseModelOutputWithPast]:
|
||||
out = super().forward(
|
||||
input_ids,
|
||||
attention_mask,
|
||||
|
@ -5,7 +5,7 @@
|
||||
# modular_switch_function.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
# Note that llama and cohere have different definitions for rotate_half
|
||||
from typing import Callable, Optional, Tuple
|
||||
from typing import Callable, Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -123,12 +123,12 @@ class SwitchFunctionAttention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
||||
position_embeddings: tuple[torch.Tensor, torch.Tensor],
|
||||
attention_mask: Optional[torch.Tensor],
|
||||
past_key_value: Optional[Cache] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
|
||||
@ -18,10 +18,10 @@ class DummyBertModel(BertModel):
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
encoder_hidden_states: Optional[torch.Tensor] = None,
|
||||
encoder_attention_mask: Optional[torch.Tensor] = None,
|
||||
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
||||
past_key_values: Optional[list[torch.FloatTensor]] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
||||
return super().forward(input_ids)
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import ClassVar, List, Optional, Union
|
||||
from typing import ClassVar, Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
@ -29,7 +29,7 @@ class NewTaskModelForNewTask(PaliGemmaForConditionalGeneration):
|
||||
pixel_values: torch.FloatTensor = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None,
|
||||
past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None,
|
||||
token_type_ids: Optional[torch.LongTensor] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
|
||||
@ -15,14 +15,14 @@ class SuperModel(LlamaModel):
|
||||
input_ids: torch.LongTensor = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
position_ids: Optional[torch.LongTensor] = None,
|
||||
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
|
||||
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
|
||||
inputs_embeds: Optional[torch.FloatTensor] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
cache_position: Optional[torch.LongTensor] = None,
|
||||
) -> Union[Tuple, CausalLMOutputWithPast]:
|
||||
) -> Union[tuple, CausalLMOutputWithPast]:
|
||||
out = super().forward(
|
||||
input_ids,
|
||||
attention_mask,
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,9 +17,10 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
@ -200,7 +200,7 @@ class Evaluator:
|
||||
def reset_metric(self):
|
||||
self.metric.reset()
|
||||
|
||||
def postprocess_target_batch(self, target_batch) -> List[Dict[str, torch.Tensor]]:
|
||||
def postprocess_target_batch(self, target_batch) -> list[dict[str, torch.Tensor]]:
|
||||
"""Collect targets in a form of list of dictionaries with keys "masks", "labels"."""
|
||||
batch_masks = target_batch[0]
|
||||
batch_labels = target_batch[1]
|
||||
@ -214,13 +214,13 @@ class Evaluator:
|
||||
)
|
||||
return post_processed_targets
|
||||
|
||||
def get_target_sizes(self, post_processed_targets) -> List[List[int]]:
|
||||
def get_target_sizes(self, post_processed_targets) -> list[list[int]]:
|
||||
target_sizes = []
|
||||
for target in post_processed_targets:
|
||||
target_sizes.append(target["masks"].shape[-2:])
|
||||
return target_sizes
|
||||
|
||||
def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> List[Dict[str, torch.Tensor]]:
|
||||
def postprocess_prediction_batch(self, prediction_batch, target_sizes) -> list[dict[str, torch.Tensor]]:
|
||||
"""Collect predictions in a form of list of dictionaries with keys "masks", "labels", "scores"."""
|
||||
|
||||
model_output = ModelOutput(class_queries_logits=prediction_batch[0], masks_queries_logits=prediction_batch[1])
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -21,9 +20,10 @@ import logging
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Mapping
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping
|
||||
from typing import Any
|
||||
|
||||
import albumentations as A
|
||||
import datasets
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -551,7 +550,7 @@ def main():
|
||||
covariance_matrix=1e-5 * sigma,
|
||||
)
|
||||
new_token_embeddings = torch.stack(
|
||||
tuple((dist.sample() for _ in range(len(special_tokens)))),
|
||||
tuple(dist.sample() for _ in range(len(special_tokens))),
|
||||
dim=0,
|
||||
)
|
||||
else:
|
||||
@ -571,7 +570,7 @@ def main():
|
||||
covariance_matrix=1e-5 * sigma,
|
||||
)
|
||||
new_token_embeddings = torch.stack(
|
||||
tuple((dist.sample() for _ in range(len(special_tokens)))),
|
||||
tuple(dist.sample() for _ in range(len(special_tokens))),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -518,7 +517,7 @@ def main():
|
||||
covariance_matrix=1e-5 * sigma,
|
||||
)
|
||||
new_token_embeddings = torch.stack(
|
||||
tuple((dist.sample() for _ in range(len(special_tokens)))),
|
||||
tuple(dist.sample() for _ in range(len(special_tokens))),
|
||||
dim=0,
|
||||
)
|
||||
else:
|
||||
@ -538,7 +537,7 @@ def main():
|
||||
covariance_matrix=1e-5 * sigma,
|
||||
)
|
||||
new_token_embeddings = torch.stack(
|
||||
tuple((dist.sample() for _ in range(len(special_tokens)))),
|
||||
tuple(dist.sample() for _ in range(len(special_tokens))),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,9 +17,10 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from typing import Any, List, Mapping, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
@ -60,7 +60,7 @@ class ModelOutput:
|
||||
|
||||
|
||||
def format_image_annotations_as_coco(
|
||||
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]]
|
||||
image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
|
||||
) -> dict:
|
||||
"""Format one set of image annotations to the COCO format
|
||||
|
||||
@ -94,7 +94,7 @@ def format_image_annotations_as_coco(
|
||||
}
|
||||
|
||||
|
||||
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
|
||||
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
|
||||
"""
|
||||
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
|
||||
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
|
||||
@ -148,7 +148,7 @@ def augment_and_transform_batch(
|
||||
return result
|
||||
|
||||
|
||||
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]:
|
||||
def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
|
||||
data = {}
|
||||
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
|
||||
data["labels"] = [x["labels"] for x in batch]
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -19,9 +18,10 @@ import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
from collections.abc import Mapping
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Mapping, Tuple, Union
|
||||
from typing import Any, Union
|
||||
|
||||
import albumentations as A
|
||||
import datasets
|
||||
@ -61,7 +61,7 @@ require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/sema
|
||||
|
||||
# Copied from examples/pytorch/object-detection/run_object_detection.format_image_annotations_as_coco
|
||||
def format_image_annotations_as_coco(
|
||||
image_id: str, categories: List[int], areas: List[float], bboxes: List[Tuple[float]]
|
||||
image_id: str, categories: list[int], areas: list[float], bboxes: list[tuple[float]]
|
||||
) -> dict:
|
||||
"""Format one set of image annotations to the COCO format
|
||||
|
||||
@ -96,7 +96,7 @@ def format_image_annotations_as_coco(
|
||||
|
||||
|
||||
# Copied from examples/pytorch/object-detection/run_object_detection.convert_bbox_yolo_to_pascal
|
||||
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
|
||||
def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
|
||||
"""
|
||||
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
|
||||
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
|
||||
@ -152,7 +152,7 @@ def augment_and_transform_batch(
|
||||
|
||||
|
||||
# Copied from examples/pytorch/object-detection/run_object_detection.collate_fn
|
||||
def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Tensor, List[Any]]]:
|
||||
def collate_fn(batch: list[BatchFeature]) -> Mapping[str, Union[torch.Tensor, list[Any]]]:
|
||||
data = {}
|
||||
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
|
||||
data["labels"] = [x["labels"] for x in batch]
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 HuggingFace Inc..
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -33,7 +32,7 @@ def get_results(output_dir):
|
||||
results = {}
|
||||
path = os.path.join(output_dir, "all_results.json")
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
results = json.load(f)
|
||||
else:
|
||||
raise ValueError(f"can't find {path}")
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -22,7 +21,7 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -469,7 +468,7 @@ def main():
|
||||
question_column: str,
|
||||
context_column: str,
|
||||
answer_column: str,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
) -> tuple[list[str], list[str]]:
|
||||
questions = examples[question_column]
|
||||
contexts = examples[context_column]
|
||||
answers = examples[answer_column]
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,7 +17,7 @@ A subclass of `Trainer` specific to Question-Answering tasks
|
||||
|
||||
import math
|
||||
import time
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Optional
|
||||
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
@ -42,10 +41,10 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
|
||||
self,
|
||||
eval_dataset: Optional[Dataset] = None,
|
||||
eval_examples=None,
|
||||
ignore_keys: Optional[List[str]] = None,
|
||||
ignore_keys: Optional[list[str]] = None,
|
||||
metric_key_prefix: str = "eval",
|
||||
**gen_kwargs,
|
||||
) -> Dict[str, float]:
|
||||
) -> dict[str, float]:
|
||||
gen_kwargs = gen_kwargs.copy()
|
||||
|
||||
# Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -20,7 +19,7 @@ import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
|
||||
def postprocess_qa_predictions(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@ -223,7 +222,7 @@ def postprocess_qa_predictions(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
@ -253,7 +252,7 @@ def postprocess_qa_predictions(
|
||||
def postprocess_qa_predictions_with_beam_search(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -258,7 +257,7 @@ def main():
|
||||
else:
|
||||
repo_id = data_args.dataset_name
|
||||
filename = "id2label.json"
|
||||
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
|
||||
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
|
||||
id2label = {int(k): v for k, v in id2label.items()}
|
||||
label2id = {v: str(k) for k, v in id2label.items()}
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -316,7 +315,7 @@ def main():
|
||||
else:
|
||||
repo_id = args.dataset_name
|
||||
filename = "id2label.json"
|
||||
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
|
||||
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset")))
|
||||
id2label = {int(k): v for k, v in id2label.items()}
|
||||
label2id = {v: k for k, v in id2label.items()}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -20,7 +19,7 @@ import math
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import datasets
|
||||
import torch
|
||||
@ -328,7 +327,7 @@ class DataCollatorForWav2Vec2Pretraining:
|
||||
mask_time_prob: Optional[float] = 0.65
|
||||
mask_time_length: Optional[int] = 10
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
|
||||
# reformat list to dict and set to pytorch format
|
||||
batch = self.feature_extractor.pad(
|
||||
features,
|
||||
@ -716,7 +715,7 @@ def main():
|
||||
}
|
||||
log_str = ""
|
||||
for k, v in train_logs.items():
|
||||
log_str += "| {}: {:.3e}".format(k, v.item())
|
||||
log_str += f"| {k}: {v.item():.3e}"
|
||||
|
||||
if accelerator.is_local_main_process:
|
||||
progress_bar.write(log_str)
|
||||
@ -773,7 +772,7 @@ def main():
|
||||
|
||||
log_str = ""
|
||||
for k, v in val_logs.items():
|
||||
log_str += "| {}: {:.3e}".format(k, v.item())
|
||||
log_str += f"| {k}: {v.item():.3e}"
|
||||
|
||||
if accelerator.is_local_main_process:
|
||||
progress_bar.write(log_str)
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -24,7 +23,7 @@ import re
|
||||
import sys
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -211,11 +210,11 @@ class DataTrainingArguments:
|
||||
)
|
||||
},
|
||||
)
|
||||
chars_to_ignore: Optional[List[str]] = list_field(
|
||||
chars_to_ignore: Optional[list[str]] = list_field(
|
||||
default=None,
|
||||
metadata={"help": "A list of characters to remove from the transcripts."},
|
||||
)
|
||||
eval_metrics: List[str] = list_field(
|
||||
eval_metrics: list[str] = list_field(
|
||||
default=["wer"],
|
||||
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
|
||||
)
|
||||
@ -318,7 +317,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
feature_extractor_input_name: Optional[str] = "input_values"
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -24,7 +23,7 @@ import re
|
||||
import sys
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -201,11 +200,11 @@ class DataTrainingArguments:
|
||||
)
|
||||
},
|
||||
)
|
||||
chars_to_ignore: Optional[List[str]] = list_field(
|
||||
chars_to_ignore: Optional[list[str]] = list_field(
|
||||
default=None,
|
||||
metadata={"help": "A list of characters to remove from the transcripts."},
|
||||
)
|
||||
eval_metrics: List[str] = list_field(
|
||||
eval_metrics: list[str] = list_field(
|
||||
default=["wer"],
|
||||
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
|
||||
)
|
||||
@ -300,7 +299,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of: Optional[int] = None
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -23,7 +22,7 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -110,11 +109,11 @@ class ModelArguments:
|
||||
freeze_encoder: bool = field(
|
||||
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
|
||||
)
|
||||
forced_decoder_ids: List[List[int]] = field(
|
||||
forced_decoder_ids: list[list[int]] = field(
|
||||
default=None,
|
||||
metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."},
|
||||
)
|
||||
suppress_tokens: List[int] = field(
|
||||
suppress_tokens: list[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": (
|
||||
@ -247,7 +246,7 @@ class DataCollatorSpeechSeq2SeqWithPadding:
|
||||
decoder_start_token_id: int
|
||||
forward_attention_mask: bool
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
model_input_name = self.processor.model_input_names[0]
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 HuggingFace Inc..
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -51,7 +50,7 @@ def get_results(output_dir):
|
||||
results = {}
|
||||
path = os.path.join(output_dir, "all_results.json")
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
results = json.load(f)
|
||||
else:
|
||||
raise ValueError(f"can't find {path}")
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 HuggingFace Inc..
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -87,7 +86,7 @@ def get_results(output_dir):
|
||||
results = {}
|
||||
path = os.path.join(output_dir, "all_results.json")
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
results = json.load(f)
|
||||
else:
|
||||
raise ValueError(f"can't find {path}")
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -21,7 +20,7 @@ import os
|
||||
import random
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@ -256,7 +255,7 @@ class ModelArguments:
|
||||
)
|
||||
|
||||
|
||||
def get_label_list(raw_dataset, split="train") -> List[str]:
|
||||
def get_label_list(raw_dataset, split="train") -> list[str]:
|
||||
"""Get the list of labels from a multi-label dataset"""
|
||||
|
||||
if isinstance(raw_dataset[split]["label"][0], list):
|
||||
@ -537,7 +536,7 @@ def main():
|
||||
model.config.id2label = {id: label for label, id in label_to_id.items()}
|
||||
elif not is_regression: # classification, but not training
|
||||
logger.info("using label infos in the model config")
|
||||
logger.info("label2id: {}".format(model.config.label2id))
|
||||
logger.info(f"label2id: {model.config.label2id}")
|
||||
label_to_id = model.config.label2id
|
||||
else: # regression
|
||||
label_to_id = None
|
||||
@ -549,7 +548,7 @@ def main():
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
def multi_labels_to_ids(labels: List[str]) -> List[float]:
|
||||
def multi_labels_to_ids(labels: list[str]) -> list[float]:
|
||||
ids = [0.0] * len(label_to_id) # BCELoss requires float as target type
|
||||
for label in labels:
|
||||
ids[label_to_id[label]] = 1.0
|
||||
@ -735,7 +734,7 @@ def main():
|
||||
else:
|
||||
item = label_list[item]
|
||||
writer.write(f"{index}\t{item}\n")
|
||||
logger.info("Predict results saved at {}".format(output_predict_file))
|
||||
logger.info(f"Predict results saved at {output_predict_file}")
|
||||
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
|
||||
|
||||
if training_args.push_to_hub:
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
@ -19,7 +18,6 @@
|
||||
import argparse
|
||||
import inspect
|
||||
import logging
|
||||
from typing import Tuple
|
||||
|
||||
import torch
|
||||
from accelerate import PartialState
|
||||
@ -271,8 +269,8 @@ class _ModelFallbackWrapper(GenerationMixin):
|
||||
)
|
||||
|
||||
def _reorder_cache(
|
||||
self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
|
||||
) -> Tuple[Tuple[torch.Tensor]]:
|
||||
self, past_key_values: tuple[tuple[torch.Tensor]], beam_idx: torch.Tensor
|
||||
) -> tuple[tuple[torch.Tensor]]:
|
||||
"""
|
||||
This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or
|
||||
[`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 University of Cambridge, Tencent AI Lab, DeepMind and The University of Hong Kong Authors and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user