mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Patch models (#6326)
* TFAlbertFor{TokenClassification, MultipleChoice} * Patch models * BERT and TF BERT info s * Update check_repo
This commit is contained in:
parent
6028ed92bd
commit
b99098abc7
@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
|
||||
class BertLMHeadModel(BertPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
|
||||
|
||||
if not config.is_decoder:
|
||||
logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
|
||||
|
||||
self.bert = BertModel(config)
|
||||
self.cls = BertOnlyMLMHead(config)
|
||||
@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
|
||||
class BertForMaskedLM(BertPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
assert (
|
||||
not config.is_decoder
|
||||
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
|
||||
|
||||
if config.is_decoder:
|
||||
logger.info(
|
||||
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
|
||||
"bi-directional self-attention."
|
||||
)
|
||||
|
||||
self.bert = BertModel(config)
|
||||
self.cls = BertOnlyMLMHead(config)
|
||||
|
@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
||||
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
def __init__(self, config, *inputs, **kwargs):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
assert (
|
||||
not config.is_decoder
|
||||
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
|
||||
|
||||
if config.is_decoder:
|
||||
logger.info(
|
||||
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
|
||||
"bi-directional self-attention."
|
||||
)
|
||||
|
||||
self.bert = TFBertMainLayer(config, name="bert")
|
||||
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
||||
@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
def __init__(self, config, *inputs, **kwargs):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
|
||||
|
||||
if not config.is_decoder:
|
||||
logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
|
||||
|
||||
self.bert = TFBertMainLayer(config, name="bert")
|
||||
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
||||
|
@ -32,6 +32,7 @@ if is_tf_available():
|
||||
TFAlbertForMultipleChoice,
|
||||
TFAlbertForSequenceClassification,
|
||||
TFAlbertForQuestionAnswering,
|
||||
TFAlbertForTokenClassification,
|
||||
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
@ -109,6 +110,7 @@ class TFAlbertModelTester:
|
||||
config = AlbertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
embedding_size=self.embedding_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
@ -198,6 +200,19 @@ class TFAlbertModelTester:
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_albert_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFAlbertForTokenClassification(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
TFAlbertForMaskedLM,
|
||||
TFAlbertForSequenceClassification,
|
||||
TFAlbertForQuestionAnswering,
|
||||
TFAlbertForTokenClassification,
|
||||
TFAlbertForMultipleChoice,
|
||||
)
|
||||
if is_tf_available()
|
||||
else ()
|
||||
|
@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
(
|
||||
TFBertModel,
|
||||
TFBertForMaskedLM,
|
||||
TFBertLMHeadModel,
|
||||
TFBertForNextSentencePrediction,
|
||||
TFBertForPreTraining,
|
||||
TFBertForQuestionAnswering,
|
||||
|
@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
TFElectraForTokenClassification,
|
||||
TFElectraForMultipleChoice,
|
||||
TFElectraForSequenceClassification,
|
||||
TFElectraForQuestionAnswering,
|
||||
)
|
||||
if is_tf_available()
|
||||
else ()
|
||||
|
@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
|
||||
"DPRSpanPredictor", # Building part of bigger (tested) model.
|
||||
"ReformerForMaskedLM", # Needs to be setup as decoder.
|
||||
"T5Stack", # Building part of bigger (tested) model.
|
||||
"TFAlbertForMultipleChoice", # TODO: fix
|
||||
"TFAlbertForTokenClassification", # TODO: fix
|
||||
"TFBertLMHeadModel", # TODO: fix
|
||||
"TFElectraForMultipleChoice", # Fix is in #6284
|
||||
"TFElectraForQuestionAnswering", # TODO: fix
|
||||
"TFElectraForSequenceClassification", # Fix is in #6284
|
||||
"TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
|
||||
"TFRobertaForMultipleChoice", # TODO: fix
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user