mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00

* Change the way tracing happens, enabling dynamic axes out of the box * Update the tests and modeling xlnet * Add the non recoding of leaf modules to avoid recording more values for the methods to record than what will be seen at tracing time (which would otherwise desynchronize the recorded values and the values that need to be given to the proxies during tracing, causing errors). * Comments and making tracing work for gpt-j and xlnet * Refactore things related to num_choices (and batch_size, sequence_length) * Update fx to work on PyTorch 1.10 * Postpone autowrap_function feature usage for later * Add copyrights * Remove unnecessary file * Fix issue with add_new_model_like * Apply suggestions
381 lines
16 KiB
Python
381 lines
16 KiB
Python
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
|
# Copyright 2021 NVIDIA Corporation. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
""" Testing suite for the PyTorch MegatronBERT model. """
|
|
|
|
|
|
import math
|
|
import os
|
|
import unittest
|
|
|
|
from transformers import MegatronBertConfig, is_torch_available
|
|
from transformers.models.auto import get_values
|
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
|
|
|
from .test_configuration_common import ConfigTester
|
|
from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
|
|
|
|
|
|
if is_torch_available():
|
|
import torch
|
|
|
|
from transformers import (
|
|
MODEL_FOR_PRETRAINING_MAPPING,
|
|
MegatronBertForCausalLM,
|
|
MegatronBertForMaskedLM,
|
|
MegatronBertForMultipleChoice,
|
|
MegatronBertForNextSentencePrediction,
|
|
MegatronBertForPreTraining,
|
|
MegatronBertForQuestionAnswering,
|
|
MegatronBertForSequenceClassification,
|
|
MegatronBertForTokenClassification,
|
|
MegatronBertModel,
|
|
)
|
|
|
|
|
|
class MegatronBertModelTester:
|
|
def __init__(
|
|
self,
|
|
parent,
|
|
batch_size=13,
|
|
seq_length=7,
|
|
is_training=True,
|
|
use_input_mask=True,
|
|
use_token_type_ids=True,
|
|
use_labels=True,
|
|
vocab_size=99,
|
|
hidden_size=64,
|
|
embedding_size=32,
|
|
num_hidden_layers=5,
|
|
num_attention_heads=4,
|
|
intermediate_size=37,
|
|
hidden_act="gelu",
|
|
hidden_dropout_prob=0.1,
|
|
attention_probs_dropout_prob=0.1,
|
|
max_position_embeddings=512,
|
|
type_vocab_size=16,
|
|
type_sequence_label_size=2,
|
|
initializer_range=0.02,
|
|
num_labels=3,
|
|
num_choices=4,
|
|
scope=None,
|
|
):
|
|
self.parent = parent
|
|
self.batch_size = batch_size
|
|
self.seq_length = seq_length
|
|
self.is_training = is_training
|
|
self.use_input_mask = use_input_mask
|
|
self.use_token_type_ids = use_token_type_ids
|
|
self.use_labels = use_labels
|
|
self.vocab_size = vocab_size
|
|
self.hidden_size = hidden_size
|
|
self.embedding_size = embedding_size
|
|
self.num_hidden_layers = num_hidden_layers
|
|
self.num_attention_heads = num_attention_heads
|
|
self.intermediate_size = intermediate_size
|
|
self.hidden_act = hidden_act
|
|
self.hidden_dropout_prob = hidden_dropout_prob
|
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
|
self.max_position_embeddings = max_position_embeddings
|
|
self.type_vocab_size = type_vocab_size
|
|
self.type_sequence_label_size = type_sequence_label_size
|
|
self.initializer_range = initializer_range
|
|
self.num_labels = num_labels
|
|
self.num_choices = num_choices
|
|
self.scope = scope
|
|
|
|
def prepare_config_and_inputs(self):
|
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
|
|
|
input_mask = None
|
|
if self.use_input_mask:
|
|
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
|
|
|
token_type_ids = None
|
|
if self.use_token_type_ids:
|
|
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
|
|
|
sequence_labels = None
|
|
token_labels = None
|
|
choice_labels = None
|
|
if self.use_labels:
|
|
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
|
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
|
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
|
|
|
config = self.get_config()
|
|
|
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
|
|
def get_config(self):
|
|
return MegatronBertConfig(
|
|
vocab_size=self.vocab_size,
|
|
hidden_size=self.hidden_size,
|
|
num_hidden_layers=self.num_hidden_layers,
|
|
num_attention_heads=self.num_attention_heads,
|
|
intermediate_size=self.intermediate_size,
|
|
embedding_size=self.embedding_size,
|
|
hidden_act=self.hidden_act,
|
|
hidden_dropout_prob=self.hidden_dropout_prob,
|
|
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
|
max_position_embeddings=self.max_position_embeddings,
|
|
type_vocab_size=self.type_vocab_size,
|
|
is_decoder=False,
|
|
initializer_range=self.initializer_range,
|
|
)
|
|
|
|
def create_and_check_megatron_bert_model(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertModel(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
|
|
result = model(input_ids, token_type_ids=token_type_ids)
|
|
result = model(input_ids)
|
|
|
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
|
self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
|
|
|
|
def create_and_check_megatron_bert_for_masked_lm(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertForMaskedLM(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
|
|
|
def create_and_check_for_causal_lm(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertForCausalLM(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
|
|
|
def create_and_check_megatron_bert_for_next_sequence_prediction(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertForNextSentencePrediction(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(
|
|
input_ids,
|
|
attention_mask=input_mask,
|
|
token_type_ids=token_type_ids,
|
|
labels=sequence_labels,
|
|
)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))
|
|
|
|
def create_and_check_megatron_bert_for_pretraining(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertForPreTraining(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(
|
|
input_ids,
|
|
attention_mask=input_mask,
|
|
token_type_ids=token_type_ids,
|
|
labels=token_labels,
|
|
next_sentence_label=sequence_labels,
|
|
)
|
|
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
|
self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2))
|
|
|
|
def create_and_check_megatron_bert_for_question_answering(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
model = MegatronBertForQuestionAnswering(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(
|
|
input_ids,
|
|
attention_mask=input_mask,
|
|
token_type_ids=token_type_ids,
|
|
start_positions=sequence_labels,
|
|
end_positions=sequence_labels,
|
|
)
|
|
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
|
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
|
|
|
def create_and_check_megatron_bert_for_sequence_classification(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
config.num_labels = self.num_labels
|
|
model = MegatronBertForSequenceClassification(config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
|
|
|
def create_and_check_megatron_bert_for_token_classification(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
config.num_labels = self.num_labels
|
|
model = MegatronBertForTokenClassification(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
|
|
|
def create_and_check_megatron_bert_for_multiple_choice(
|
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
):
|
|
config.num_choices = self.num_choices
|
|
model = MegatronBertForMultipleChoice(config=config)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
|
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
|
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
|
result = model(
|
|
multiple_choice_inputs_ids,
|
|
attention_mask=multiple_choice_input_mask,
|
|
token_type_ids=multiple_choice_token_type_ids,
|
|
labels=choice_labels,
|
|
)
|
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
|
|
|
def prepare_config_and_inputs_for_common(self):
|
|
config_and_inputs = self.prepare_config_and_inputs()
|
|
(
|
|
config,
|
|
input_ids,
|
|
token_type_ids,
|
|
input_mask,
|
|
sequence_labels,
|
|
token_labels,
|
|
choice_labels,
|
|
) = config_and_inputs
|
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
|
return config, inputs_dict
|
|
|
|
|
|
@require_torch
|
|
class MegatronBertModelTest(ModelTesterMixin, unittest.TestCase):
|
|
|
|
all_model_classes = (
|
|
(
|
|
MegatronBertModel,
|
|
MegatronBertForMaskedLM,
|
|
MegatronBertForCausalLM,
|
|
MegatronBertForMultipleChoice,
|
|
MegatronBertForNextSentencePrediction,
|
|
MegatronBertForPreTraining,
|
|
MegatronBertForQuestionAnswering,
|
|
MegatronBertForSequenceClassification,
|
|
MegatronBertForTokenClassification,
|
|
)
|
|
if is_torch_available()
|
|
else ()
|
|
)
|
|
fx_compatible = True
|
|
# test_resize_embeddings = False
|
|
test_head_masking = False
|
|
|
|
# special case for ForPreTraining model
|
|
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
|
|
|
if return_labels:
|
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
|
inputs_dict["labels"] = torch.zeros(
|
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
|
)
|
|
inputs_dict["next_sentence_label"] = torch.zeros(
|
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
|
)
|
|
return inputs_dict
|
|
|
|
def setUp(self):
|
|
self.model_tester = MegatronBertModelTester(self)
|
|
self.config_tester = ConfigTester(self, config_class=MegatronBertConfig, hidden_size=37)
|
|
|
|
def test_config(self):
|
|
self.config_tester.run_common_tests()
|
|
|
|
def test_megatron_bert_model(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_model(*config_and_inputs)
|
|
|
|
def test_for_masked_lm(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_masked_lm(*config_and_inputs)
|
|
|
|
def test_for_multiple_choice(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_multiple_choice(*config_and_inputs)
|
|
|
|
def test_for_next_sequence_prediction(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_next_sequence_prediction(*config_and_inputs)
|
|
|
|
def test_for_pretraining(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_pretraining(*config_and_inputs)
|
|
|
|
def test_for_question_answering(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_question_answering(*config_and_inputs)
|
|
|
|
def test_for_sequence_classification(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_sequence_classification(*config_and_inputs)
|
|
|
|
def test_for_token_classification(self):
|
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
self.model_tester.create_and_check_megatron_bert_for_token_classification(*config_and_inputs)
|
|
|
|
|
|
def _long_tensor(tok_lst):
|
|
return torch.tensor(
|
|
tok_lst,
|
|
dtype=torch.long,
|
|
device=torch_device,
|
|
)
|
|
|
|
|
|
TOLERANCE = 1e-4
|
|
|
|
|
|
@require_torch
|
|
@require_sentencepiece
|
|
@require_tokenizers
|
|
class MegatronBertModelIntegrationTests(unittest.TestCase):
|
|
@slow
|
|
@unittest.skip("Model is not available.")
|
|
def test_inference_no_head(self):
|
|
directory = "nvidia/megatron-bert-uncased-345m"
|
|
if "MYDIR" in os.environ:
|
|
directory = os.path.join(os.environ["MYDIR"], directory)
|
|
model = MegatronBertModel.from_pretrained(directory)
|
|
model.to(torch_device)
|
|
model.half()
|
|
input_ids = _long_tensor([[101, 7110, 1005, 1056, 2023, 11333, 17413, 1029, 102]])
|
|
with torch.no_grad():
|
|
output = model(input_ids)[0]
|
|
expected_shape = torch.Size((1, 9, 1024))
|
|
self.assertEqual(output.shape, expected_shape)
|
|
|
|
expected = [-0.6040, -0.2517, -0.1025, 0.3420, -0.6758, -0.0017, -0.1089, -0.1990, 0.5728]
|
|
for ii in range(3):
|
|
for jj in range(3):
|
|
a = output[0, ii, jj]
|
|
b = expected[3 * ii + jj]
|
|
msg = "ii={} jj={} a={} b={}".format(ii, jj, a, b)
|
|
self.assertTrue(math.isclose(a, b, rel_tol=TOLERANCE, abs_tol=TOLERANCE), msg=msg)
|