RoBERTa doesn't print a warning when no special tokens are passed.

This commit is contained in:
Lysandre 2019-10-22 13:46:48 -04:00
parent 1cfd974868
commit 44286b94d3
2 changed files with 0 additions and 28 deletions

View File

@ -169,18 +169,6 @@ class RobertaModel(BertModel):
self.embeddings = RobertaEmbeddings(config)
self.init_weights()
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
if input_ids[:, 0].sum().item() != 0:
logger.warning("A sequence with no special tokens has been passed to the RoBERTa model. "
"This model requires special tokens in order to work. "
"Please specify add_special_tokens=True in your tokenize.encode()"
"or tokenizer.convert_tokens_to_ids().")
return super(RobertaModel, self).forward(input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask)
@add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """,
ROBERTA_START_DOCSTRING, ROBERTA_INPUTS_DOCSTRING)

View File

@ -65,22 +65,6 @@ class TFRobertaMainLayer(TFBertMainLayer):
super(TFRobertaMainLayer, self).__init__(config, **kwargs)
self.embeddings = TFRobertaEmbeddings(config, name='embeddings')
def call(self, inputs, **kwargs):
# Check that input_ids starts with control token
if isinstance(inputs, (tuple, list)):
input_ids = inputs[0]
elif isinstance(inputs, dict):
input_ids = inputs.get('input_ids')
else:
input_ids = inputs
if tf.not_equal(tf.reduce_sum(input_ids[:, 0]), 0):
tf.print("A sequence with no special tokens has been passed to the RoBERTa model. "
"This model requires special tokens in order to work. "
"Please specify add_special_tokens=True in your encoding.")
return super(TFRobertaMainLayer, self).call(inputs, **kwargs)
class TFRobertaPreTrainedModel(TFPreTrainedModel):
""" An abstract class to handle weights initialization and