Migrate metric to Evaluate in Pytorch examples (#18369)

* Migrate metric to Evaluate in pytorch examples

* Remove unused imports
This commit is contained in:
atturaioe 2022-08-01 14:40:25 +03:00 committed by GitHub
parent 25ec12eaf7
commit 1f84399171
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 72 additions and 49 deletions

View File

@ -23,3 +23,4 @@ torchvision
jiwer jiwer
librosa librosa
torch < 1.12 torch < 1.12
evaluate

View File

@ -26,6 +26,7 @@ import datasets
import numpy as np import numpy as np
from datasets import DatasetDict, load_dataset from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -315,7 +316,7 @@ def main():
id2label[str(i)] = label id2label[str(i)] = label
# Load the accuracy metric from the datasets package # Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy") metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
# `predictions` and `label_ids` fields) and has to return a dictionary string to float. # `predictions` and `label_ids` fields) and has to return a dictionary string to float.

View File

@ -19,7 +19,6 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset from datasets import load_dataset
@ -34,6 +33,7 @@ from torchvision.transforms import (
ToTensor, ToTensor,
) )
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
@ -252,7 +252,7 @@ def main():
id2label[str(i)] = label id2label[str(i)] = label
# Load the accuracy metric from the datasets package # Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy") metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -22,7 +22,7 @@ from pathlib import Path
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision.transforms import ( from torchvision.transforms import (
CenterCrop, CenterCrop,
@ -35,6 +35,7 @@ from torchvision.transforms import (
) )
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -415,7 +416,7 @@ def main():
accelerator.init_trackers("image_classification_no_trainer", experiment_config) accelerator.init_trackers("image_classification_no_trainer", experiment_config)
# Get the metric function # Get the metric function
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
@ -492,7 +493,7 @@ def main():
logits = logits[0] logits = logits[0]
return logits.argmax(dim=-1) return logits.argmax(dim=-1)
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
def compute_metrics(eval_preds): def compute_metrics(eval_preds):
preds, labels = eval_preds preds, labels = eval_preds

View File

@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
@ -515,7 +516,7 @@ def main():
logits = logits[0] logits = logits[0]
return logits.argmax(dim=-1) return logits.argmax(dim=-1)
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
def compute_metrics(eval_preds): def compute_metrics(eval_preds):
preds, labels = eval_preds preds, labels = eval_preds

View File

@ -31,10 +31,11 @@ from typing import Optional, Union
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -514,7 +515,7 @@ def main():
accelerator.init_trackers("swag_no_trainer", experiment_config) accelerator.init_trackers("swag_no_trainer", experiment_config)
# Metrics # Metrics
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_qa import QuestionAnsweringTrainer from trainer_qa import QuestionAnsweringTrainer
from transformers import ( from transformers import (
@ -593,7 +594,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_qa import QuestionAnsweringTrainer from trainer_qa import QuestionAnsweringTrainer
from transformers import ( from transformers import (
@ -625,7 +626,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -680,7 +681,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
def create_and_fill_np_array(start_or_end_logits, dataset, max_len): def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
""" """

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -696,7 +697,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
# Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor # Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
def create_and_fill_np_array(start_or_end_logits, dataset, max_len): def create_and_fill_np_array(start_or_end_logits, dataset, max_len):

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer
from transformers import ( from transformers import (
@ -581,7 +582,7 @@ def main():
pad_to_multiple_of=8 if training_args.fp16 else None, pad_to_multiple_of=8 if training_args.fp16 else None,
) )
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -21,7 +21,6 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset from datasets import load_dataset
@ -30,6 +29,7 @@ from torch import nn
from torchvision import transforms from torchvision import transforms
from torchvision.transforms import functional from torchvision.transforms import functional
import evaluate
import transformers import transformers
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from transformers import ( from transformers import (
@ -337,7 +337,7 @@ def main():
label2id = {v: str(k) for k, v in id2label.items()} label2id = {v: str(k) for k, v in id2label.items()}
# Load the mean IoU metric from the datasets package # Load the mean IoU metric from the datasets package
metric = datasets.load_metric("mean_iou") metric = evaluate.load("mean_iou")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -24,13 +24,14 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from PIL import Image from PIL import Image
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision import transforms from torchvision import transforms
from torchvision.transforms import functional from torchvision.transforms import functional
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -500,7 +501,7 @@ def main():
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Instantiate metric # Instantiate metric
metric = load_metric("mean_iou") metric = evaluate.load("mean_iou")
# We need to initialize the trackers we use, and also store our configuration. # We need to initialize the trackers we use, and also store our configuration.
# We initialize the trackers only on main process because `accelerator.log` # We initialize the trackers only on main process because `accelerator.log`

View File

@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import DatasetDict, load_dataset, load_metric from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -643,7 +644,7 @@ def main():
# instantiate a data collator and the trainer # instantiate a data collator and the trainer
# Define evaluation metrics during training, *i.e.* word error rate, character error rate # Define evaluation metrics during training, *i.e.* word error rate, character error rate
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics} eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics}
# for large datasets it is advised to run the preprocessing on a # for large datasets it is advised to run the preprocessing on a
# single machine first with ``args.preprocessing_only`` since there will mostly likely # single machine first with ``args.preprocessing_only`` since there will mostly likely

View File

@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union
import datasets import datasets
import torch import torch
from datasets import DatasetDict, load_dataset, load_metric from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -425,7 +426,7 @@ def main():
return return
# 8. Load Metric # 8. Load Metric
metric = load_metric("wer") metric = evaluate.load("wer")
def compute_metrics(pred): def compute_metrics(pred):
pred_ids = pred.predictions pred_ids = pred.predictions

View File

@ -27,8 +27,9 @@ from typing import Optional
import datasets import datasets
import nltk # Here to have a nice missing dependency error message early on import nltk # Here to have a nice missing dependency error message early on
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from filelock import FileLock from filelock import FileLock
from transformers import ( from transformers import (
@ -598,7 +599,7 @@ def main():
) )
# Metric # Metric
metric = load_metric("rouge") metric = evaluate.load("rouge")
def postprocess_text(preds, labels): def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds] preds = [pred.strip() for pred in preds]

View File

@ -30,10 +30,11 @@ import datasets
import nltk import nltk
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -583,7 +584,7 @@ def main():
accelerator.init_trackers("summarization_no_trainer", experiment_config) accelerator.init_trackers("summarization_no_trainer", experiment_config)
# Metric # Metric
metric = load_metric("rouge") metric = evaluate.load("rouge")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -25,8 +25,9 @@ from typing import Optional
import datasets import datasets
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -480,9 +481,9 @@ def main():
# Get the metric function # Get the metric function
if data_args.task_name is not None: if data_args.task_name is not None:
metric = load_metric("glue", data_args.task_name) metric = evaluate.load("glue", data_args.task_name)
else: else:
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -23,10 +23,11 @@ from pathlib import Path
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -466,9 +467,9 @@ def main():
# Get the metric function # Get the metric function
if args.task_name is not None: if args.task_name is not None:
metric = load_metric("glue", args.task_name) metric = evaluate.load("glue", args.task_name)
else: else:
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -26,8 +26,9 @@ from typing import Optional
import datasets import datasets
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -349,7 +350,7 @@ def main():
) )
# Get the metric function # Get the metric function
metric = load_metric("xnli") metric = evaluate.load("xnli")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -27,8 +27,9 @@ from typing import Optional
import datasets import datasets
import numpy as np import numpy as np
from datasets import ClassLabel, load_dataset, load_metric from datasets import ClassLabel, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -504,7 +505,7 @@ def main():
data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None) data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
# Metrics # Metrics
metric = load_metric("seqeval") metric = evaluate.load("seqeval")
def compute_metrics(p): def compute_metrics(p):
predictions, labels = p predictions, labels = p

View File

@ -28,10 +28,11 @@ from pathlib import Path
import datasets import datasets
import torch import torch
from datasets import ClassLabel, load_dataset, load_metric from datasets import ClassLabel, load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -580,7 +581,7 @@ def main():
accelerator.init_trackers("ner_no_trainer", experiment_config) accelerator.init_trackers("ner_no_trainer", experiment_config)
# Metrics # Metrics
metric = load_metric("seqeval") metric = evaluate.load("seqeval")
def get_labels(predictions, references): def get_labels(predictions, references):
# Transform predictions and references tensos to numpy arrays # Transform predictions and references tensos to numpy arrays

View File

@ -26,8 +26,9 @@ from typing import Optional
import datasets import datasets
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
@ -522,7 +523,7 @@ def main():
) )
# Metric # Metric
metric = load_metric("sacrebleu") metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels): def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds] preds = [pred.strip() for pred in preds]

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
@ -562,7 +563,7 @@ def main():
experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value
accelerator.init_trackers("translation_no_trainer", experiment_config) accelerator.init_trackers("translation_no_trainer", experiment_config)
metric = load_metric("sacrebleu") metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels): def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds] preds = [pred.strip() for pred in preds]