Patch models (#6326)

* TFAlbertFor{TokenClassification, MultipleChoice}

* Patch models

* BERT and TF BERT info


s

* Update check_repo
This commit is contained in:
Lysandre Debut 2020-08-10 10:39:17 -04:00 committed by GitHub
parent 6028ed92bd
commit b99098abc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 37 additions and 14 deletions

View File

@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
class BertLMHeadModel(BertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
if not config.is_decoder:
logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
self.bert = BertModel(config)
self.cls = BertOnlyMLMHead(config)
@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
class BertForMaskedLM(BertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
assert (
not config.is_decoder
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
if config.is_decoder:
logger.info(
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
"bi-directional self-attention."
)
self.bert = BertModel(config)
self.cls = BertOnlyMLMHead(config)

View File

@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
def __init__(self, config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
assert (
not config.is_decoder
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
if config.is_decoder:
logger.info(
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
"bi-directional self-attention."
)
self.bert = TFBertMainLayer(config, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
def __init__(self, config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
if not config.is_decoder:
logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
self.bert = TFBertMainLayer(config, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")

View File

@ -32,6 +32,7 @@ if is_tf_available():
TFAlbertForMultipleChoice,
TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering,
TFAlbertForTokenClassification,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
)
@ -109,6 +110,7 @@ class TFAlbertModelTester:
config = AlbertConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
embedding_size=self.embedding_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
@ -198,6 +200,19 @@ class TFAlbertModelTester:
result = model(inputs)
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
def create_and_check_albert_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = TFAlbertForTokenClassification(config=config)
inputs = {
"input_ids": input_ids,
"attention_mask": input_mask,
"token_type_ids": token_type_ids,
}
result = model(inputs)
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
TFAlbertForMaskedLM,
TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering,
TFAlbertForTokenClassification,
TFAlbertForMultipleChoice,
)
if is_tf_available()
else ()

View File

@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
(
TFBertModel,
TFBertForMaskedLM,
TFBertLMHeadModel,
TFBertForNextSentencePrediction,
TFBertForPreTraining,
TFBertForQuestionAnswering,

View File

@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
TFElectraForTokenClassification,
TFElectraForMultipleChoice,
TFElectraForSequenceClassification,
TFElectraForQuestionAnswering,
)
if is_tf_available()
else ()

View File

@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
"DPRSpanPredictor", # Building part of bigger (tested) model.
"ReformerForMaskedLM", # Needs to be setup as decoder.
"T5Stack", # Building part of bigger (tested) model.
"TFAlbertForMultipleChoice", # TODO: fix
"TFAlbertForTokenClassification", # TODO: fix
"TFBertLMHeadModel", # TODO: fix
"TFElectraForMultipleChoice", # Fix is in #6284
"TFElectraForQuestionAnswering", # TODO: fix
"TFElectraForSequenceClassification", # Fix is in #6284
"TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
"TFRobertaForMultipleChoice", # TODO: fix
]