mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Switch return_dict
to True
by default. (#8530)
* Use the CI to identify failing tests * Remove from all examples and tests * More default switch * Fixes * More test fixes * More fixes * Last fixes hopefully * Use the CI to identify failing tests * Remove from all examples and tests * More default switch * Fixes * More test fixes * More fixes * Last fixes hopefully * Run on the real suite * Fix slow tests
This commit is contained in:
parent
0d0a0785fd
commit
1073a2bde5
@ -40,7 +40,7 @@ Usage:
|
|||||||
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
||||||
|
|
||||||
# train...
|
# train...
|
||||||
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels, return_dict=True).loss
|
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
|
|||||||
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
||||||
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
|
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
|
||||||
# the forward function automatically creates the correct decoder_input_ids
|
# the forward function automatically creates the correct decoder_input_ids
|
||||||
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss
|
loss = model(input_ids=input_ids, labels=labels).loss
|
||||||
|
|
||||||
- Supervised training
|
- Supervised training
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
|
|||||||
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
|
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
|
||||||
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
|
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
|
||||||
# the forward function automatically creates the correct decoder_input_ids
|
# the forward function automatically creates the correct decoder_input_ids
|
||||||
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss
|
loss = model(input_ids=input_ids, labels=labels).loss
|
||||||
|
|
||||||
|
|
||||||
T5Config
|
T5Config
|
||||||
|
@ -89,7 +89,7 @@ each other. The process is the following:
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
||||||
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
|
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
|
||||||
|
|
||||||
>>> classes = ["not paraphrase", "is paraphrase"]
|
>>> classes = ["not paraphrase", "is paraphrase"]
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ each other. The process is the following:
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
||||||
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
|
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
|
||||||
|
|
||||||
>>> classes = ["not paraphrase", "is paraphrase"]
|
>>> classes = ["not paraphrase", "is paraphrase"]
|
||||||
|
|
||||||
@ -211,7 +211,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
||||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
||||||
|
|
||||||
>>> text = r"""
|
>>> text = r"""
|
||||||
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
||||||
@ -253,7 +253,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
||||||
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
|
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
||||||
|
|
||||||
>>> text = r"""
|
>>> text = r"""
|
||||||
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
||||||
@ -373,7 +373,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
||||||
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
|
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
|
||||||
|
|
||||||
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
||||||
|
|
||||||
@ -389,7 +389,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
||||||
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
|
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased")
|
||||||
|
|
||||||
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
||||||
|
|
||||||
@ -437,7 +437,7 @@ of tokens.
|
|||||||
>>> from torch.nn import functional as F
|
>>> from torch.nn import functional as F
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||||
>>> model = AutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
|
>>> model = AutoModelWithLMHead.from_pretrained("gpt2")
|
||||||
|
|
||||||
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
||||||
|
|
||||||
@ -461,7 +461,7 @@ of tokens.
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||||
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
|
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2")
|
||||||
|
|
||||||
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
||||||
|
|
||||||
@ -520,7 +520,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
|
|||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
|
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
||||||
|
|
||||||
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
||||||
@ -545,7 +545,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
|
|||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
|
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
||||||
|
|
||||||
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
||||||
@ -664,7 +664,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
|
|||||||
>>> from transformers import AutoModelForTokenClassification, AutoTokenizer
|
>>> from transformers import AutoModelForTokenClassification, AutoTokenizer
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
|
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||||
|
|
||||||
>>> label_list = [
|
>>> label_list = [
|
||||||
@ -692,7 +692,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
|
|||||||
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
|
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||||
|
|
||||||
>>> label_list = [
|
>>> label_list = [
|
||||||
@ -790,7 +790,7 @@ CNN / Daily Mail), it yields very good results.
|
|||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
|
|
||||||
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
||||||
@ -799,7 +799,7 @@ CNN / Daily Mail), it yields very good results.
|
|||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
|
|
||||||
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
||||||
@ -843,7 +843,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
|
|||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
|
|
||||||
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt")
|
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt")
|
||||||
@ -851,7 +851,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
|
|||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
||||||
|
|
||||||
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
|
|
||||||
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf")
|
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf")
|
||||||
|
@ -39,7 +39,7 @@ head on top of the encoder with an output size of 2. Models are initialized in `
|
|||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from transformers import BertForSequenceClassification
|
from transformers import BertForSequenceClassification
|
||||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True)
|
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||||
model.train()
|
model.train()
|
||||||
|
|
||||||
This is useful because it allows us to make use of the pre-trained BERT encoder and easily train it on whatever
|
This is useful because it allows us to make use of the pre-trained BERT encoder and easily train it on whatever
|
||||||
|
@ -210,7 +210,6 @@
|
|||||||
" visual_feats=features,\n",
|
" visual_feats=features,\n",
|
||||||
" visual_pos=normalized_boxes,\n",
|
" visual_pos=normalized_boxes,\n",
|
||||||
" token_type_ids=inputs.token_type_ids,\n",
|
" token_type_ids=inputs.token_type_ids,\n",
|
||||||
" return_dict=True,\n",
|
|
||||||
" output_attentions=False,\n",
|
" output_attentions=False,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" output_vqa = lxmert_vqa(\n",
|
" output_vqa = lxmert_vqa(\n",
|
||||||
@ -219,7 +218,6 @@
|
|||||||
" visual_feats=features,\n",
|
" visual_feats=features,\n",
|
||||||
" visual_pos=normalized_boxes,\n",
|
" visual_pos=normalized_boxes,\n",
|
||||||
" token_type_ids=inputs.token_type_ids,\n",
|
" token_type_ids=inputs.token_type_ids,\n",
|
||||||
" return_dict=True,\n",
|
|
||||||
" output_attentions=False,\n",
|
" output_attentions=False,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" # get prediction\n",
|
" # get prediction\n",
|
||||||
|
@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""):
|
|||||||
eval_feature = features[feature_index.item()]
|
eval_feature = features[feature_index.item()]
|
||||||
unique_id = int(eval_feature.unique_id)
|
unique_id = int(eval_feature.unique_id)
|
||||||
|
|
||||||
output = [to_list(output[i]) for output in outputs]
|
output = [to_list(output[i]) for output in outputs.to_tuple()]
|
||||||
|
|
||||||
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
|
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
|
||||||
# models only use two.
|
# models only use two.
|
||||||
|
@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions):
|
|||||||
truncation=True,
|
truncation=True,
|
||||||
)["input_ids"].to(args.device)
|
)["input_ids"].to(args.device)
|
||||||
|
|
||||||
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids, return_dict=True)
|
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids)
|
||||||
question_enc_pool_output = question_enc_outputs.pooler_output
|
question_enc_pool_output = question_enc_outputs.pooler_output
|
||||||
|
|
||||||
result = rag_model.retriever(
|
result = rag_model.retriever(
|
||||||
|
@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer):
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
labels=lm_labels,
|
labels=lm_labels,
|
||||||
return_dict=True,
|
|
||||||
**rag_kwargs,
|
**rag_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon
|
|||||||
input_ids = ctx_tokenizer(
|
input_ids = ctx_tokenizer(
|
||||||
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
|
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
|
||||||
)["input_ids"]
|
)["input_ids"]
|
||||||
embeddings = ctx_encoder(input_ids.to(device=device), return_dict=True).pooler_output
|
embeddings = ctx_encoder(input_ids.to(device=device)).pooler_output
|
||||||
return {"embeddings": embeddings.detach().cpu().numpy()}
|
return {"embeddings": embeddings.detach().cpu().numpy()}
|
||||||
|
|
||||||
|
|
||||||
|
@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule):
|
|||||||
output_hidden_states=self.do_calc_hidden_loss,
|
output_hidden_states=self.do_calc_hidden_loss,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
lm_logits = student_outputs.logits
|
lm_logits = student_outputs.logits
|
||||||
|
|
||||||
@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule):
|
|||||||
input_ids,
|
input_ids,
|
||||||
attention_mask=src_mask,
|
attention_mask=src_mask,
|
||||||
output_hidden_states=self.do_calc_hidden_loss,
|
output_hidden_states=self.do_calc_hidden_loss,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
if self.different_base_models:
|
if self.different_base_models:
|
||||||
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
|
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
|
||||||
@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule):
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
output_hidden_states=self.do_calc_hidden_loss,
|
output_hidden_states=self.do_calc_hidden_loss,
|
||||||
use_cache=False, # since we are not passing labels, never let this default to True
|
use_cache=False, # since we are not passing labels, never let this default to True
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
dec_mask = decoder_input_ids.ne(pad_token_id)
|
dec_mask = decoder_input_ids.ne(pad_token_id)
|
||||||
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)
|
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)
|
||||||
|
@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus):
|
|||||||
|
|
||||||
@require_torch_non_multi_gpu_but_fix_me
|
@require_torch_non_multi_gpu_but_fix_me
|
||||||
def test_loss_fn(self):
|
def test_loss_fn(self):
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
|
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY)
|
||||||
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
|
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
|
||||||
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
|
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
|
||||||
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?
|
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?
|
||||||
|
@ -23,7 +23,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
|
|||||||
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
|
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
|
||||||
labels = tokenizer(target_str, return_tensors="pt").input_ids
|
labels = tokenizer(target_str, return_tensors="pt").input_ids
|
||||||
|
|
||||||
loss = model(input_ids, labels=labels, return_dict=True).loss
|
loss = model(input_ids, labels=labels).loss
|
||||||
```
|
```
|
||||||
|
|
||||||
### Citation
|
### Citation
|
||||||
|
@ -26,7 +26,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
|
|||||||
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
|
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
|
||||||
labels = tokenizer(target_str, return_tensors="pt").input_ids
|
labels = tokenizer(target_str, return_tensors="pt").input_ids
|
||||||
|
|
||||||
loss = model(input_ids, labels=labels, return_dict=True).loss
|
loss = model(input_ids, labels=labels).loss
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that since this model is a multi-lingual model it can be fine-tuned on all kinds of other languages.
|
Note that since this model is a multi-lingual model it can be fine-tuned on all kinds of other languages.
|
||||||
|
@ -45,7 +45,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
tokenizer = AutoTokenizer.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
|
tokenizer = AutoTokenizer.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
|
||||||
model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code', return_dict=True)
|
model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
|
||||||
|
|
||||||
inputs = tokenizer("your code here", return_tensors="pt", truncation=True, padding='max_length')
|
inputs = tokenizer("your code here", return_tensors="pt", truncation=True, padding='max_length')
|
||||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||||
|
@ -13,7 +13,7 @@ sentences = ["Hello World", "Hallo Welt"]
|
|||||||
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
|
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
model_output = model(**encoded_input, return_dict=True)
|
model_output = model(**encoded_input)
|
||||||
|
|
||||||
embeddings = model_output.pooler_output
|
embeddings = model_output.pooler_output
|
||||||
embeddings = torch.nn.functional.normalize(embeddings)
|
embeddings = torch.nn.functional.normalize(embeddings)
|
||||||
|
@ -59,7 +59,7 @@ print(f"num of params {tiny_model.num_parameters()}")
|
|||||||
|
|
||||||
# Test
|
# Test
|
||||||
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
|
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
|
||||||
outputs = tiny_model(**batch, return_dict=True)
|
outputs = tiny_model(**batch)
|
||||||
|
|
||||||
print("test output:", len(outputs.logits[0]))
|
print("test output:", len(outputs.logits[0]))
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ print(f"num of params {tiny_model.num_parameters()}")
|
|||||||
|
|
||||||
# Test
|
# Test
|
||||||
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
|
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
|
||||||
outputs = tiny_model(**batch, return_dict=True)
|
outputs = tiny_model(**batch)
|
||||||
|
|
||||||
print("test output:", len(outputs.logits[0]))
|
print("test output:", len(outputs.logits[0]))
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ class PretrainedConfig(object):
|
|||||||
Whether or not the model should returns all attentions.
|
Whether or not the model should returns all attentions.
|
||||||
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||||
return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
return_dict (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||||
Whether or not the model should return a :class:`~transformers.file_utils.ModelOutput` instead of a plain
|
Whether or not the model should return a :class:`~transformers.file_utils.ModelOutput` instead of a plain
|
||||||
tuple.
|
tuple.
|
||||||
is_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
is_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
@ -163,7 +163,7 @@ class PretrainedConfig(object):
|
|||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
# Attributes with defaults
|
# Attributes with defaults
|
||||||
self.return_dict = kwargs.pop("return_dict", False)
|
self.return_dict = kwargs.pop("return_dict", True)
|
||||||
self.output_hidden_states = kwargs.pop("output_hidden_states", False)
|
self.output_hidden_states = kwargs.pop("output_hidden_states", False)
|
||||||
self.output_attentions = kwargs.pop("output_attentions", False)
|
self.output_attentions = kwargs.pop("output_attentions", False)
|
||||||
self.use_cache = kwargs.pop("use_cache", True) # Not used by all models
|
self.use_cache = kwargs.pop("use_cache", True) # Not used by all models
|
||||||
|
@ -559,7 +559,7 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
|
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
|
||||||
@ -576,7 +576,7 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||||
>>> inputs = tokenizer(question, text, return_tensors='pt')
|
>>> inputs = tokenizer(question, text, return_tensors='pt')
|
||||||
@ -596,7 +596,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||||
@ -612,7 +612,7 @@ PT_MASKED_LM_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
|
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
|
||||||
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
|
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
|
||||||
@ -629,7 +629,7 @@ PT_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -644,7 +644,7 @@ PT_MULTIPLE_CHOICE_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||||
>>> choice0 = "It is eaten with a fork and a knife."
|
>>> choice0 = "It is eaten with a fork and a knife."
|
||||||
@ -666,7 +666,7 @@ PT_CAUSAL_LM_SAMPLE = r"""
|
|||||||
>>> from transformers import {tokenizer_class}, {model_class}
|
>>> from transformers import {tokenizer_class}, {model_class}
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint})
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs, labels=inputs["input_ids"])
|
>>> outputs = model(**inputs, labels=inputs["input_ids"])
|
||||||
@ -681,7 +681,7 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||||
>>> input_ids = inputs["input_ids"]
|
>>> input_ids = inputs["input_ids"]
|
||||||
@ -699,7 +699,7 @@ TF_QUESTION_ANSWERING_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||||
>>> input_dict = tokenizer(question, text, return_tensors='tf')
|
>>> input_dict = tokenizer(question, text, return_tensors='tf')
|
||||||
@ -718,7 +718,7 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||||
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||||
@ -735,7 +735,7 @@ TF_MASKED_LM_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
|
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
|
||||||
>>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
|
>>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
|
||||||
@ -752,7 +752,7 @@ TF_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||||
>>> outputs = model(inputs)
|
>>> outputs = model(inputs)
|
||||||
@ -767,7 +767,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||||
>>> choice0 = "It is eaten with a fork and a knife."
|
>>> choice0 = "It is eaten with a fork and a knife."
|
||||||
@ -788,7 +788,7 @@ TF_CAUSAL_LM_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||||
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
|
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||||
>>> outputs = model(inputs)
|
>>> outputs = model(inputs)
|
||||||
|
@ -416,7 +416,7 @@ class AlbertTransformer(nn.Module):
|
|||||||
head_mask=None,
|
head_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
hidden_states = self.embedding_hidden_mapping_in(hidden_states)
|
hidden_states = self.embedding_hidden_mapping_in(hidden_states)
|
||||||
|
|
||||||
@ -764,7 +764,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
>>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||||
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2', return_dict=True)
|
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2')
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
|
@ -329,7 +329,7 @@ class BartEncoder(nn.Module):
|
|||||||
self.layer_norm = LayerNorm(config.d_model) if config.add_final_layer_norm else None
|
self.layer_norm = LayerNorm(config.d_model) if config.add_final_layer_norm else None
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False
|
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@ -528,7 +528,7 @@ class BartDecoder(nn.Module):
|
|||||||
use_cache=False,
|
use_cache=False,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
**unused,
|
**unused,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -446,7 +446,7 @@ class BertEncoder(nn.Module):
|
|||||||
encoder_attention_mask=None,
|
encoder_attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
all_hidden_states = () if output_hidden_states else None
|
all_hidden_states = () if output_hidden_states else None
|
||||||
all_self_attentions = () if output_attentions else None
|
all_self_attentions = () if output_attentions else None
|
||||||
@ -920,7 +920,7 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased', return_dict=True)
|
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -1036,7 +1036,7 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
||||||
>>> config = BertConfig.from_pretrained("bert-base-cased")
|
>>> config = BertConfig.from_pretrained("bert-base-cased")
|
||||||
>>> config.is_decoder = True
|
>>> config.is_decoder = True
|
||||||
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config, return_dict=True)
|
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -1250,7 +1250,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased', return_dict=True)
|
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
|
||||||
|
|
||||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||||
|
@ -463,7 +463,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
|
|||||||
>>> tokenizer = BertGenerationTokenizer.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder')
|
>>> tokenizer = BertGenerationTokenizer.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder')
|
||||||
>>> config = BertGenerationConfig.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder")
|
>>> config = BertGenerationConfig.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder")
|
||||||
>>> config.is_decoder = True
|
>>> config.is_decoder = True
|
||||||
>>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config, return_dict=True)
|
>>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config)
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
|
@ -384,7 +384,7 @@ class DebertaEncoder(nn.Module):
|
|||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
query_states=None,
|
query_states=None,
|
||||||
relative_pos=None,
|
relative_pos=None,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
attention_mask = self.get_attention_mask(attention_mask)
|
attention_mask = self.get_attention_mask(attention_mask)
|
||||||
relative_pos = self.get_rel_pos(hidden_states, query_states, relative_pos)
|
relative_pos = self.get_rel_pos(hidden_states, query_states, relative_pos)
|
||||||
|
@ -885,7 +885,7 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||||
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased', return_dict=True)
|
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
|
||||||
|
|
||||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||||
>>> choice0 = "It is eaten with a fork and a knife."
|
>>> choice0 = "It is eaten with a fork and a knife."
|
||||||
|
@ -455,7 +455,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
|
|||||||
|
|
||||||
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
|
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
|
||||||
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
||||||
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True)
|
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
||||||
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
||||||
>>> embeddings = model(input_ids).pooler_output
|
>>> embeddings = model(input_ids).pooler_output
|
||||||
"""
|
"""
|
||||||
@ -533,7 +533,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
|
|||||||
|
|
||||||
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
|
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
|
||||||
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
||||||
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True)
|
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
||||||
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
||||||
>>> embeddings = model(input_ids).pooler_output
|
>>> embeddings = model(input_ids).pooler_output
|
||||||
"""
|
"""
|
||||||
@ -609,7 +609,7 @@ class DPRReader(DPRPretrainedReader):
|
|||||||
|
|
||||||
>>> from transformers import DPRReader, DPRReaderTokenizer
|
>>> from transformers import DPRReader, DPRReaderTokenizer
|
||||||
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
|
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
|
||||||
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True)
|
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
|
||||||
>>> encoded_inputs = tokenizer(
|
>>> encoded_inputs = tokenizer(
|
||||||
... questions=["What is love ?"],
|
... questions=["What is love ?"],
|
||||||
... titles=["Haddaway"],
|
... titles=["Haddaway"],
|
||||||
|
@ -442,7 +442,7 @@ class ElectraEncoder(nn.Module):
|
|||||||
encoder_attention_mask=None,
|
encoder_attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
all_hidden_states = () if output_hidden_states else None
|
all_hidden_states = () if output_hidden_states else None
|
||||||
all_self_attentions = () if output_attentions else None
|
all_self_attentions = () if output_attentions else None
|
||||||
|
@ -370,7 +370,7 @@ class EncoderDecoderModel(PreTrainedModel):
|
|||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||||
|
|
||||||
>>> # training
|
>>> # training
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
|
||||||
>>> loss, logits = outputs.loss, outputs.logits
|
>>> loss, logits = outputs.loss, outputs.logits
|
||||||
|
|
||||||
>>> # save and load from pretrained
|
>>> # save and load from pretrained
|
||||||
|
@ -434,7 +434,7 @@ class FSMTEncoder(nn.Module):
|
|||||||
) # type: List[EncoderLayer]
|
) # type: List[EncoderLayer]
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False
|
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@ -617,7 +617,7 @@ class FSMTDecoder(nn.Module):
|
|||||||
use_cache=False,
|
use_cache=False,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
**unused,
|
**unused,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -619,7 +619,7 @@ class FunnelEncoder(nn.Module):
|
|||||||
token_type_ids=None,
|
token_type_ids=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
# The pooling is not implemented on long tensors, so we convert this mask.
|
# The pooling is not implemented on long tensors, so we convert this mask.
|
||||||
attention_mask = attention_mask.type_as(inputs_embeds)
|
attention_mask = attention_mask.type_as(inputs_embeds)
|
||||||
@ -698,7 +698,7 @@ class FunnelDecoder(nn.Module):
|
|||||||
token_type_ids=None,
|
token_type_ids=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
upsampled_hidden = upsample(
|
upsampled_hidden = upsample(
|
||||||
final_hidden,
|
final_hidden,
|
||||||
@ -1111,7 +1111,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small')
|
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small')
|
||||||
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small', return_dict=True)
|
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small')
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt")
|
||||||
>>> logits = model(**inputs).logits
|
>>> logits = model(**inputs).logits
|
||||||
|
@ -911,7 +911,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
|
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
|
||||||
|
|
||||||
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||||
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2, return_dict=True)
|
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
|
||||||
|
|
||||||
>>> # Add a [CLS] to the vocabulary (we should train it also!)
|
>>> # Add a [CLS] to the vocabulary (we should train it also!)
|
||||||
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||||
|
@ -376,7 +376,7 @@ class LayoutLMEncoder(nn.Module):
|
|||||||
encoder_attention_mask=None,
|
encoder_attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
all_hidden_states = () if output_hidden_states else None
|
all_hidden_states = () if output_hidden_states else None
|
||||||
all_self_attentions = () if output_attentions else None
|
all_self_attentions = () if output_attentions else None
|
||||||
|
@ -1050,7 +1050,7 @@ class LongformerEncoder(nn.Module):
|
|||||||
attention_mask=None,
|
attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
is_index_masked = attention_mask < 0
|
is_index_masked = attention_mask < 0
|
||||||
@ -1388,7 +1388,7 @@ class LongformerModel(LongformerPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from transformers import LongformerModel, LongformerTokenizer
|
>>> from transformers import LongformerModel, LongformerTokenizer
|
||||||
|
|
||||||
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096', return_dict=True)
|
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096')
|
||||||
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||||
|
|
||||||
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||||
@ -1526,7 +1526,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
|
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
|
||||||
|
|
||||||
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096', return_dict=True)
|
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
|
||||||
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||||
|
|
||||||
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||||
@ -1742,7 +1742,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||||
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa", return_dict=True)
|
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||||
|
|
||||||
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||||
>>> encoding = tokenizer(question, text, return_tensors="pt")
|
>>> encoding = tokenizer(question, text, return_tensors="pt")
|
||||||
|
@ -558,7 +558,7 @@ class MobileBertEncoder(nn.Module):
|
|||||||
encoder_attention_mask=None,
|
encoder_attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
all_hidden_states = () if output_hidden_states else None
|
all_hidden_states = () if output_hidden_states else None
|
||||||
all_attentions = () if output_attentions else None
|
all_attentions = () if output_attentions else None
|
||||||
@ -1006,7 +1006,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
||||||
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased", return_dict=True)
|
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
@ -1216,7 +1216,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
|
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
|
||||||
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased', return_dict=True)
|
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
|
||||||
|
|
||||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||||
|
@ -670,7 +670,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||||
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt', return_dict=True)
|
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
|
||||||
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
|
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
|
||||||
>>> model.resize_token_embeddings(len(tokenizer))
|
>>> model.resize_token_embeddings(len(tokenizer))
|
||||||
|
|
||||||
|
@ -1180,7 +1180,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
||||||
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', return_dict=True)
|
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone')
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
|
|
||||||
@ -1319,7 +1319,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
||||||
>>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False, return_dict=True)
|
>>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False)
|
||||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -1621,7 +1621,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
|
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
|
||||||
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
|
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
|
||||||
@ -1731,7 +1731,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
>>> logits_next_token = outputs.logits # logits to predict next token as usual
|
>>> logits_next_token = outputs.logits # logits to predict next token as usual
|
||||||
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
|
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
|
||||||
@ -1940,10 +1940,10 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
|
||||||
>>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased', return_dict=True)
|
>>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased')
|
||||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs, return_dict=True)
|
>>> outputs = model(**inputs)
|
||||||
|
|
||||||
>>> logits = outputs.logits
|
>>> logits = outputs.logits
|
||||||
|
|
||||||
@ -1962,7 +1962,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
|
|||||||
... )
|
... )
|
||||||
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
|
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
|
||||||
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
|
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:], return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
|
||||||
|
|
||||||
>>> loss = outputs.loss
|
>>> loss = outputs.loss
|
||||||
"""
|
"""
|
||||||
|
@ -558,9 +558,7 @@ class RagModel(RagPreTrainedModel):
|
|||||||
if encoder_outputs is None:
|
if encoder_outputs is None:
|
||||||
|
|
||||||
if has_to_retrieve:
|
if has_to_retrieve:
|
||||||
question_enc_outputs = self.question_encoder(
|
question_enc_outputs = self.question_encoder(input_ids, attention_mask=attention_mask)
|
||||||
input_ids, attention_mask=attention_mask, return_dict=True
|
|
||||||
)
|
|
||||||
question_encoder_last_hidden_state = question_enc_outputs[0] # hidden states of question encoder
|
question_encoder_last_hidden_state = question_enc_outputs[0] # hidden states of question encoder
|
||||||
|
|
||||||
retriever_outputs = self.retriever(
|
retriever_outputs = self.retriever(
|
||||||
@ -620,7 +618,6 @@ class RagModel(RagPreTrainedModel):
|
|||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
past_key_values=past_key_values,
|
past_key_values=past_key_values,
|
||||||
use_cache=use_cache,
|
use_cache=use_cache,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not has_to_retrieve:
|
if not has_to_retrieve:
|
||||||
@ -1364,7 +1361,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
|
|||||||
batch_size = context_input_ids.shape[0] // n_docs
|
batch_size = context_input_ids.shape[0] // n_docs
|
||||||
|
|
||||||
encoder = self.rag.generator.get_encoder()
|
encoder = self.rag.generator.get_encoder()
|
||||||
encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask, return_dict=True)
|
encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask)
|
||||||
|
|
||||||
input_ids = torch.full(
|
input_ids = torch.full(
|
||||||
(batch_size * num_beams, 1),
|
(batch_size * num_beams, 1),
|
||||||
|
@ -390,7 +390,7 @@ class RobertaEncoder(nn.Module):
|
|||||||
encoder_attention_mask=None,
|
encoder_attention_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
all_hidden_states = () if output_hidden_states else None
|
all_hidden_states = () if output_hidden_states else None
|
||||||
all_self_attentions = () if output_attentions else None
|
all_self_attentions = () if output_attentions else None
|
||||||
@ -770,7 +770,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
>>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||||
>>> config = RobertaConfig.from_pretrained("roberta-base", return_dict=True)
|
>>> config = RobertaConfig.from_pretrained("roberta-base")
|
||||||
>>> config.is_decoder = True
|
>>> config.is_decoder = True
|
||||||
>>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
|
>>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
|
||||||
|
|
||||||
|
@ -314,7 +314,7 @@ class SqueezeBertEncoder(nn.Module):
|
|||||||
head_mask=None,
|
head_mask=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
if head_mask is None:
|
if head_mask is None:
|
||||||
|
@ -534,7 +534,7 @@ class T5Block(nn.Module):
|
|||||||
past_key_value=None,
|
past_key_value=None,
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
if past_key_value is not None:
|
if past_key_value is not None:
|
||||||
@ -1022,7 +1022,7 @@ class T5Model(T5PreTrainedModel):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
>>> last_hidden_states = outputs.last_hidden_state
|
>>> last_hidden_states = outputs.last_hidden_state
|
||||||
"""
|
"""
|
||||||
@ -1177,7 +1177,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
|||||||
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
|
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||||
|
|
||||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||||
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True)
|
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
||||||
|
|
||||||
>>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
>>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
||||||
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2> </s>', return_tensors='pt').input_ids
|
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2> </s>', return_tensors='pt').input_ids
|
||||||
|
@ -1063,7 +1063,7 @@ class TFBartForConditionalGeneration(TFPretrainedBartModel):
|
|||||||
TXT = "My friends are <mask> but they eat too many carbs."
|
TXT = "My friends are <mask> but they eat too many carbs."
|
||||||
model = TFBartForConditionalGeneration.from_pretrained(mname)
|
model = TFBartForConditionalGeneration.from_pretrained(mname)
|
||||||
batch = tokenizer([TXT], return_tensors='tf')
|
batch = tokenizer([TXT], return_tensors='tf')
|
||||||
logits = model(inputs=batch.input_ids, return_dict=True).logits
|
logits = model(inputs=batch.input_ids).logits
|
||||||
probs = tf.nn.softmax(logits[0])
|
probs = tf.nn.softmax(logits[0])
|
||||||
# probs[5] is associated with the mask token
|
# probs[5] is associated with the mask token
|
||||||
"""
|
"""
|
||||||
|
@ -466,7 +466,7 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
|
|||||||
|
|
||||||
>>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer
|
>>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer
|
||||||
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
||||||
>>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True, from_pt=True)
|
>>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', from_pt=True)
|
||||||
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
|
||||||
>>> embeddings = model(input_ids).pooler_output
|
>>> embeddings = model(input_ids).pooler_output
|
||||||
"""
|
"""
|
||||||
@ -565,7 +565,7 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
|
|||||||
|
|
||||||
>>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer
|
>>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer
|
||||||
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
||||||
>>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True, from_pt=True)
|
>>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', from_pt=True)
|
||||||
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
|
||||||
>>> embeddings = model(input_ids).pooler_output
|
>>> embeddings = model(input_ids).pooler_output
|
||||||
"""
|
"""
|
||||||
@ -663,7 +663,7 @@ class TFDPRReader(TFDPRPretrainedReader):
|
|||||||
|
|
||||||
>>> from transformers import TFDPRReader, DPRReaderTokenizer
|
>>> from transformers import TFDPRReader, DPRReaderTokenizer
|
||||||
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
|
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
|
||||||
>>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True, from_pt=True)
|
>>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', from_pt=True)
|
||||||
>>> encoded_inputs = tokenizer(
|
>>> encoded_inputs = tokenizer(
|
||||||
... questions=["What is love ?"],
|
... questions=["What is love ?"],
|
||||||
... titles=["Haddaway"],
|
... titles=["Haddaway"],
|
||||||
|
@ -634,7 +634,7 @@ class TFFunnelEncoder(tf.keras.layers.Layer):
|
|||||||
token_type_ids=None,
|
token_type_ids=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
training=False,
|
training=False,
|
||||||
):
|
):
|
||||||
# The pooling is not implemented on long tensors, so we convert this mask.
|
# The pooling is not implemented on long tensors, so we convert this mask.
|
||||||
@ -719,7 +719,7 @@ class TFFunnelDecoder(tf.keras.layers.Layer):
|
|||||||
token_type_ids=None,
|
token_type_ids=None,
|
||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=False,
|
return_dict=True,
|
||||||
training=False,
|
training=False,
|
||||||
):
|
):
|
||||||
upsampled_hidden = upsample(
|
upsampled_hidden = upsample(
|
||||||
|
@ -1275,6 +1275,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
|
|||||||
obj_labels = inputs.pop("obj_labels", obj_labels)
|
obj_labels = inputs.pop("obj_labels", obj_labels)
|
||||||
matched_label = inputs.pop("matched_label", matched_label)
|
matched_label = inputs.pop("matched_label", matched_label)
|
||||||
ans = inputs.pop("ans", ans)
|
ans = inputs.pop("ans", ans)
|
||||||
|
return_dict = return_dict if return_dict is not None else self.lxmert.return_dict
|
||||||
|
|
||||||
lxmert_output = self.lxmert(
|
lxmert_output = self.lxmert(
|
||||||
inputs,
|
inputs,
|
||||||
|
@ -1022,7 +1022,7 @@ class TFT5Model(TFT5PreTrainedModel):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -1219,7 +1219,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
|
|||||||
|
|
||||||
>>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
>>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
||||||
|
|
||||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small', return_dict=True)
|
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||||
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
||||||
|
|
||||||
>>> inputs = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='tf').input_ids
|
>>> inputs = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='tf').input_ids
|
||||||
|
@ -1020,7 +1020,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||||
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048', return_dict=True)
|
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> start_positions = torch.tensor([1])
|
>>> start_positions = torch.tensor([1])
|
||||||
|
@ -46,7 +46,7 @@ class XLMProphetNetEncoder(ProphetNetEncoder):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
||||||
>>> model = XLMProphetNetEncoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', return_dict=True)
|
>>> model = XLMProphetNetEncoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone')
|
||||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -68,7 +68,7 @@ class XLMProphetNetDecoder(ProphetNetDecoder):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
||||||
>>> model = XLMProphetNetDecoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', add_cross_attention=False, return_dict=True)
|
>>> model = XLMProphetNetDecoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', add_cross_attention=False)
|
||||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -93,7 +93,7 @@ class XLMProphetNetModel(ProphetNetModel):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
|
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
|
||||||
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
|
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
|
||||||
@ -116,7 +116,7 @@ class XLMProphetNetForConditionalGeneration(ProphetNetForConditionalGeneration):
|
|||||||
|
|
||||||
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
|
|
||||||
>>> logits_next_token = outputs.logits # logits to predict next token as usual
|
>>> logits_next_token = outputs.logits # logits to predict next token as usual
|
||||||
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
|
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
|
||||||
@ -136,7 +136,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
|
||||||
>>> model = XLMProphetNetForCausalLM.from_pretrained('patrickvonplaten/xprophetnet-decoder-clm-large-uncased', return_dict=True)
|
>>> model = XLMProphetNetForCausalLM.from_pretrained('patrickvonplaten/xprophetnet-decoder-clm-large-uncased')
|
||||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
@ -158,7 +158,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM):
|
|||||||
... )
|
... )
|
||||||
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
|
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
|
||||||
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
|
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
|
||||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:], return_dict=True)
|
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
|
||||||
|
|
||||||
>>> loss = outputs.loss
|
>>> loss = outputs.loss
|
||||||
"""
|
"""
|
||||||
|
@ -1381,7 +1381,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||||
>>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased', return_dict=True)
|
>>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
|
||||||
|
|
||||||
>>> # We show how to setup inputs to predict a next token using a bi-directional context.
|
>>> # We show how to setup inputs to predict a next token using a bi-directional context.
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token
|
||||||
@ -1916,7 +1916,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||||
>>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased', return_dict=True)
|
>>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> start_positions = torch.tensor([1])
|
>>> start_positions = torch.tensor([1])
|
||||||
|
@ -118,7 +118,6 @@ class {{cookiecutter.camelcase_modelname}}ModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -118,7 +118,7 @@ class GenerationTesterMixin:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_encoder_outputs(model, input_ids, attention_mask, num_interleave=1):
|
def _get_encoder_outputs(model, input_ids, attention_mask, num_interleave=1):
|
||||||
encoder = model.get_encoder()
|
encoder = model.get_encoder()
|
||||||
encoder_outputs = encoder(input_ids, attention_mask=attention_mask, return_dict=True)
|
encoder_outputs = encoder(input_ids, attention_mask=attention_mask)
|
||||||
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
|
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
|
||||||
num_interleave, dim=0
|
num_interleave, dim=0
|
||||||
)
|
)
|
||||||
@ -344,6 +344,7 @@ class GenerationTesterMixin:
|
|||||||
def test_beam_sample_generate(self):
|
def test_beam_sample_generate(self):
|
||||||
for model_class in self.all_generative_model_classes:
|
for model_class in self.all_generative_model_classes:
|
||||||
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
|
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
|
||||||
|
print("Return dict", config.return_dict)
|
||||||
logits_warper_kwargs, logits_warper = self._get_warper_and_kwargs(num_beams=1)
|
logits_warper_kwargs, logits_warper = self._get_warper_and_kwargs(num_beams=1)
|
||||||
|
|
||||||
model = model_class(config).to(torch_device)
|
model = model_class(config).to(torch_device)
|
||||||
|
@ -102,7 +102,6 @@ class AlbertModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
num_hidden_groups=self.num_hidden_groups,
|
num_hidden_groups=self.num_hidden_groups,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -259,7 +259,6 @@ class BartHeadTests(unittest.TestCase):
|
|||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
pad_token_id=1,
|
pad_token_id=1,
|
||||||
bos_token_id=0,
|
bos_token_id=0,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
return config, input_ids, batch_size
|
return config, input_ids, batch_size
|
||||||
|
|
||||||
@ -310,7 +309,6 @@ class BartHeadTests(unittest.TestCase):
|
|||||||
encoder_ffn_dim=8,
|
encoder_ffn_dim=8,
|
||||||
decoder_ffn_dim=8,
|
decoder_ffn_dim=8,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
lm_model = BartForConditionalGeneration(config).to(torch_device)
|
lm_model = BartForConditionalGeneration(config).to(torch_device)
|
||||||
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
|
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
|
||||||
@ -713,6 +711,6 @@ class FastIntegrationTests(unittest.TestCase):
|
|||||||
padding="longest",
|
padding="longest",
|
||||||
truncation=True,
|
truncation=True,
|
||||||
)
|
)
|
||||||
features = self.xsum_1_1_model.get_encoder()(**batch, return_dict=True).last_hidden_state
|
features = self.xsum_1_1_model.get_encoder()(**batch).last_hidden_state
|
||||||
expected = [[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]]
|
expected = [[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]]
|
||||||
assert_tensors_close(features[0, :3, :3], torch.tensor(expected), atol=1e-3)
|
assert_tensors_close(features[0, :3, :3], torch.tensor(expected), atol=1e-3)
|
||||||
|
@ -124,7 +124,6 @@ class BertModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -89,7 +89,6 @@ class BertGenerationEncoderTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, input_mask, token_labels
|
return config, input_ids, input_mask, token_labels
|
||||||
|
@ -31,7 +31,7 @@ if is_torch_available():
|
|||||||
class CamembertModelIntegrationTest(unittest.TestCase):
|
class CamembertModelIntegrationTest(unittest.TestCase):
|
||||||
@slow
|
@slow
|
||||||
def test_output_embeds_base_model(self):
|
def test_output_embeds_base_model(self):
|
||||||
model = CamembertModel.from_pretrained("camembert-base", return_dict=True)
|
model = CamembertModel.from_pretrained("camembert-base")
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
|
|
||||||
input_ids = torch.tensor(
|
input_ids = torch.tensor(
|
||||||
|
@ -657,7 +657,7 @@ class ModelTesterMixin:
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
|
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||||
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
|
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
expected_num_layers = getattr(
|
||||||
|
@ -94,7 +94,6 @@ class CTRLModelTester:
|
|||||||
n_ctx=self.max_position_embeddings,
|
n_ctx=self.max_position_embeddings,
|
||||||
# type_vocab_size=self.type_vocab_size,
|
# type_vocab_size=self.type_vocab_size,
|
||||||
# initializer_range=self.initializer_range,
|
# initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||||
|
@ -148,7 +148,7 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
|
||||||
def check_loss_output(self, result):
|
def check_loss_output(self, result):
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result.loss.size()), [])
|
||||||
|
|
||||||
def create_and_check_deberta_model(
|
def create_and_check_deberta_model(
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
@ -160,11 +160,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
||||||
sequence_output = model(input_ids)[0]
|
sequence_output = model(input_ids)[0]
|
||||||
|
|
||||||
result = {
|
|
||||||
"sequence_output": sequence_output,
|
|
||||||
}
|
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
|
list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_and_check_deberta_for_sequence_classification(
|
def create_and_check_deberta_for_sequence_classification(
|
||||||
@ -174,14 +171,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
model = DebertaForSequenceClassification(config)
|
model = DebertaForSequenceClassification(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
loss, logits = model(
|
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
||||||
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
|
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
|
||||||
)
|
|
||||||
result = {
|
|
||||||
"loss": loss,
|
|
||||||
"logits": logits,
|
|
||||||
}
|
|
||||||
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
|
|
||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
@ -110,7 +110,6 @@ if is_torch_available():
|
|||||||
attention_dropout=self.attention_probs_dropout_prob,
|
attention_dropout=self.attention_probs_dropout_prob,
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -117,7 +117,6 @@ class DPRModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict())
|
config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict())
|
||||||
|
|
||||||
|
@ -101,7 +101,6 @@ class ElectraModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -85,7 +85,6 @@ class EncoderDecoderMixin:
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -117,7 +116,6 @@ class EncoderDecoderMixin:
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
outputs_encoder_decoder["logits"].shape, (decoder_input_ids.shape + (decoder_config.vocab_size,))
|
outputs_encoder_decoder["logits"].shape, (decoder_input_ids.shape + (decoder_config.vocab_size,))
|
||||||
@ -132,7 +130,6 @@ class EncoderDecoderMixin:
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -278,7 +275,6 @@ class EncoderDecoderMixin:
|
|||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
labels=labels,
|
labels=labels,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
loss = outputs_encoder_decoder["loss"]
|
loss = outputs_encoder_decoder["loss"]
|
||||||
@ -313,7 +309,6 @@ class EncoderDecoderMixin:
|
|||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
output_attentions=True,
|
output_attentions=True,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
encoder_attentions = outputs_encoder_decoder["encoder_attentions"]
|
encoder_attentions = outputs_encoder_decoder["encoder_attentions"]
|
||||||
|
@ -113,7 +113,6 @@ class FlaubertModelTester(object):
|
|||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
summary_type=self.summary_type,
|
summary_type=self.summary_type,
|
||||||
use_proj=self.use_proj,
|
use_proj=self.use_proj,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -29,7 +29,7 @@ class FlaxBertModelTest(unittest.TestCase):
|
|||||||
# Check for simple input
|
# Check for simple input
|
||||||
pt_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.PYTORCH)
|
pt_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.PYTORCH)
|
||||||
fx_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.JAX)
|
fx_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.JAX)
|
||||||
pt_outputs = pt_model(**pt_inputs)
|
pt_outputs = pt_model(**pt_inputs).to_tuple()
|
||||||
fx_outputs = fx_model(**fx_inputs)
|
fx_outputs = fx_model(**fx_inputs)
|
||||||
|
|
||||||
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")
|
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")
|
||||||
|
@ -34,7 +34,7 @@ class FlaxRobertaModelTest(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")
|
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")
|
||||||
|
|
||||||
for fx_output, pt_output in zip(fx_outputs, pt_outputs):
|
for fx_output, pt_output in zip(fx_outputs, pt_outputs.to_tuple()):
|
||||||
self.assert_almost_equals(fx_output, pt_output.numpy(), 5e-4)
|
self.assert_almost_equals(fx_output, pt_output.numpy(), 5e-4)
|
||||||
|
|
||||||
def assert_almost_equals(self, a: ndarray, b: ndarray, tol: float):
|
def assert_almost_equals(self, a: ndarray, b: ndarray, tol: float):
|
||||||
|
@ -259,7 +259,6 @@ class FSMTHeadTests(unittest.TestCase):
|
|||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
pad_token_id=1,
|
pad_token_id=1,
|
||||||
bos_token_id=0,
|
bos_token_id=0,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_config_and_data(self):
|
def _get_config_and_data(self):
|
||||||
|
@ -140,7 +140,6 @@ class FunnelModelTester:
|
|||||||
activation_dropout=self.activation_dropout,
|
activation_dropout=self.activation_dropout,
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -131,7 +131,6 @@ class GPT2ModelTester:
|
|||||||
bos_token_id=self.bos_token_id,
|
bos_token_id=self.bos_token_id,
|
||||||
eos_token_id=self.eos_token_id,
|
eos_token_id=self.eos_token_id,
|
||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
return_dict=True,
|
|
||||||
gradient_checkpointing=gradient_checkpointing,
|
gradient_checkpointing=gradient_checkpointing,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -125,7 +125,6 @@ class LayoutLMModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -113,7 +113,6 @@ class LongformerModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
attention_window=self.attention_window,
|
attention_window=self.attention_window,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -282,7 +282,6 @@ class LxmertModelTester:
|
|||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
labels=ans,
|
labels=ans,
|
||||||
output_attentions=output_attentions,
|
output_attentions=output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
result = model(input_ids, visual_feats, bounding_boxes, labels=ans)
|
result = model(input_ids, visual_feats, bounding_boxes, labels=ans)
|
||||||
result = model(
|
result = model(
|
||||||
@ -302,7 +301,6 @@ class LxmertModelTester:
|
|||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
labels=ans,
|
labels=ans,
|
||||||
output_attentions=not output_attentions,
|
output_attentions=not output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parent.assertEqual(result.question_answering_score.shape, (self.batch_size, self.num_qa_labels))
|
self.parent.assertEqual(result.question_answering_score.shape, (self.batch_size, self.num_qa_labels))
|
||||||
@ -335,7 +333,6 @@ class LxmertModelTester:
|
|||||||
matched_label=matched_label,
|
matched_label=matched_label,
|
||||||
ans=ans,
|
ans=ans,
|
||||||
output_attentions=output_attentions,
|
output_attentions=output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
result = model(
|
result = model(
|
||||||
input_ids,
|
input_ids,
|
||||||
@ -390,7 +387,6 @@ class LxmertModelTester:
|
|||||||
matched_label=matched_label,
|
matched_label=matched_label,
|
||||||
ans=ans,
|
ans=ans,
|
||||||
output_attentions=not output_attentions,
|
output_attentions=not output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||||
@ -427,7 +423,6 @@ class LxmertModelTester:
|
|||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
ans=ans,
|
ans=ans,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
result_qa = model_qa(
|
result_qa = model_qa(
|
||||||
@ -437,7 +432,6 @@ class LxmertModelTester:
|
|||||||
labels=ans,
|
labels=ans,
|
||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
model_pretrain.resize_num_qa_labels(num_small_labels)
|
model_pretrain.resize_num_qa_labels(num_small_labels)
|
||||||
@ -450,7 +444,6 @@ class LxmertModelTester:
|
|||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
ans=less_labels_ans,
|
ans=less_labels_ans,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
result_qa_less = model_qa(
|
result_qa_less = model_qa(
|
||||||
@ -460,7 +453,6 @@ class LxmertModelTester:
|
|||||||
labels=less_labels_ans,
|
labels=less_labels_ans,
|
||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
model_pretrain.resize_num_qa_labels(num_large_labels)
|
model_pretrain.resize_num_qa_labels(num_large_labels)
|
||||||
@ -473,7 +465,6 @@ class LxmertModelTester:
|
|||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
ans=more_labels_ans,
|
ans=more_labels_ans,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
result_qa_more = model_qa(
|
result_qa_more = model_qa(
|
||||||
@ -483,7 +474,6 @@ class LxmertModelTester:
|
|||||||
labels=more_labels_ans,
|
labels=more_labels_ans,
|
||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
attention_mask=input_mask,
|
attention_mask=input_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
model_qa_labels = model_qa.num_qa_labels
|
model_qa_labels = model_qa.num_qa_labels
|
||||||
|
@ -50,7 +50,6 @@ class ModelTester:
|
|||||||
decoder_ffn_dim=32,
|
decoder_ffn_dim=32,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
add_final_layer_norm=True,
|
add_final_layer_norm=True,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
@ -37,7 +37,6 @@ class ModelTester:
|
|||||||
decoder_ffn_dim=32,
|
decoder_ffn_dim=32,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
add_final_layer_norm=True,
|
add_final_layer_norm=True,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
@ -132,7 +131,6 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest):
|
|||||||
decoder_ffn_dim=32,
|
decoder_ffn_dim=32,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
add_final_layer_norm=True,
|
add_final_layer_norm=True,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
lm_model = MBartForConditionalGeneration(config).to(torch_device)
|
lm_model = MBartForConditionalGeneration(config).to(torch_device)
|
||||||
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
|
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
|
||||||
|
@ -124,7 +124,6 @@ class MobileBertModelTester:
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
is_decoder=False,
|
is_decoder=False,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -94,7 +94,6 @@ class OpenAIGPTModelTester:
|
|||||||
# type_vocab_size=self.type_vocab_size,
|
# type_vocab_size=self.type_vocab_size,
|
||||||
# initializer_range=self.initializer_range
|
# initializer_range=self.initializer_range
|
||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||||
|
@ -33,7 +33,6 @@ class ModelTester:
|
|||||||
decoder_ffn_dim=32,
|
decoder_ffn_dim=32,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
add_final_layer_norm=True,
|
add_final_layer_norm=True,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
@ -142,7 +142,6 @@ class ProphetNetModelTester:
|
|||||||
disable_ngram_loss=self.disable_ngram_loss,
|
disable_ngram_loss=self.disable_ngram_loss,
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
is_encoder_decoder=self.is_encoder_decoder,
|
is_encoder_decoder=self.is_encoder_decoder,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@ -344,7 +343,6 @@ class ProphetNetModelTester:
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tied_model_result = tied_model(
|
tied_model_result = tied_model(
|
||||||
@ -352,7 +350,6 @@ class ProphetNetModelTester:
|
|||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check that models has less parameters
|
# check that models has less parameters
|
||||||
@ -419,7 +416,6 @@ class ProphetNetModelTester:
|
|||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
labels=lm_labels,
|
labels=lm_labels,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(128.2925, device=torch_device), atol=1e-3))
|
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(128.2925, device=torch_device), atol=1e-3))
|
||||||
|
|
||||||
@ -433,9 +429,7 @@ class ProphetNetModelTester:
|
|||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
outputs_no_mask = model(
|
outputs_no_mask = model(input_ids=input_ids[:, :5], decoder_input_ids=decoder_input_ids[:, :5])
|
||||||
input_ids=input_ids[:, :5], decoder_input_ids=decoder_input_ids[:, :5], return_dict=True
|
|
||||||
)
|
|
||||||
attention_mask = torch.ones_like(input_ids)
|
attention_mask = torch.ones_like(input_ids)
|
||||||
decoder_attention_mask = torch.ones_like(decoder_input_ids)
|
decoder_attention_mask = torch.ones_like(decoder_input_ids)
|
||||||
|
|
||||||
@ -446,7 +440,6 @@ class ProphetNetModelTester:
|
|||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check encoder
|
# check encoder
|
||||||
@ -524,7 +517,6 @@ class ProphetNetStandaloneDecoderModelTester:
|
|||||||
bos_token_id=1,
|
bos_token_id=1,
|
||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
ngram=2,
|
ngram=2,
|
||||||
return_dict=True,
|
|
||||||
num_buckets=32,
|
num_buckets=32,
|
||||||
relative_max_distance=128,
|
relative_max_distance=128,
|
||||||
disable_ngram_loss=False,
|
disable_ngram_loss=False,
|
||||||
@ -562,7 +554,6 @@ class ProphetNetStandaloneDecoderModelTester:
|
|||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.add_cross_attention = add_cross_attention
|
self.add_cross_attention = add_cross_attention
|
||||||
self.is_encoder_decoder = is_encoder_decoder
|
self.is_encoder_decoder = is_encoder_decoder
|
||||||
self.return_dict = return_dict
|
|
||||||
|
|
||||||
self.scope = None
|
self.scope = None
|
||||||
self.decoder_key_length = decoder_seq_length
|
self.decoder_key_length = decoder_seq_length
|
||||||
@ -602,7 +593,6 @@ class ProphetNetStandaloneDecoderModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
add_cross_attention=self.add_cross_attention,
|
add_cross_attention=self.add_cross_attention,
|
||||||
is_encoder_decoder=self.is_encoder_decoder,
|
is_encoder_decoder=self.is_encoder_decoder,
|
||||||
return_dict=self.return_dict,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@ -757,7 +747,6 @@ class ProphetNetStandaloneEncoderModelTester:
|
|||||||
pad_token_id=0,
|
pad_token_id=0,
|
||||||
bos_token_id=1,
|
bos_token_id=1,
|
||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
return_dict=True,
|
|
||||||
num_buckets=32,
|
num_buckets=32,
|
||||||
relative_max_distance=128,
|
relative_max_distance=128,
|
||||||
disable_ngram_loss=False,
|
disable_ngram_loss=False,
|
||||||
@ -794,7 +783,6 @@ class ProphetNetStandaloneEncoderModelTester:
|
|||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.add_cross_attention = add_cross_attention
|
self.add_cross_attention = add_cross_attention
|
||||||
self.is_encoder_decoder = is_encoder_decoder
|
self.is_encoder_decoder = is_encoder_decoder
|
||||||
self.return_dict = return_dict
|
|
||||||
|
|
||||||
self.scope = None
|
self.scope = None
|
||||||
self.decoder_key_length = decoder_seq_length
|
self.decoder_key_length = decoder_seq_length
|
||||||
@ -829,7 +817,6 @@ class ProphetNetStandaloneEncoderModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
add_cross_attention=self.add_cross_attention,
|
add_cross_attention=self.add_cross_attention,
|
||||||
is_encoder_decoder=self.is_encoder_decoder,
|
is_encoder_decoder=self.is_encoder_decoder,
|
||||||
return_dict=self.return_dict,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@ -919,7 +906,6 @@ class ProphetNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
|
|||||||
# methods overwrite method in `test_modeling_common.py`
|
# methods overwrite method in `test_modeling_common.py`
|
||||||
def test_attention_outputs(self):
|
def test_attention_outputs(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
seq_len = getattr(self.model_tester, "seq_length", None)
|
seq_len = getattr(self.model_tester, "seq_length", None)
|
||||||
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
|
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
|
||||||
@ -933,7 +919,6 @@ class ProphetNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
|
|||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
inputs_dict["output_attentions"] = True
|
inputs_dict["output_attentions"] = True
|
||||||
inputs_dict["output_hidden_states"] = False
|
inputs_dict["output_hidden_states"] = False
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
@ -1121,7 +1106,6 @@ class ProphetNetModelIntegrationTest(unittest.TestCase):
|
|||||||
attention_mask=None,
|
attention_mask=None,
|
||||||
encoder_outputs=None,
|
encoder_outputs=None,
|
||||||
decoder_input_ids=decoder_prev_ids,
|
decoder_input_ids=decoder_prev_ids,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
output_predited_logits = output[0]
|
output_predited_logits = output[0]
|
||||||
expected_shape = torch.Size((1, 12, 30522))
|
expected_shape = torch.Size((1, 12, 30522))
|
||||||
@ -1143,9 +1127,7 @@ class ProphetNetModelIntegrationTest(unittest.TestCase):
|
|||||||
assert torch.allclose(encoder_outputs[:, :3, :3], expected_encoder_outputs_slice, atol=1e-4)
|
assert torch.allclose(encoder_outputs[:, :3, :3], expected_encoder_outputs_slice, atol=1e-4)
|
||||||
|
|
||||||
# decoder outputs
|
# decoder outputs
|
||||||
decoder_outputs = model.prophetnet.decoder(
|
decoder_outputs = model.prophetnet.decoder(decoder_prev_ids, encoder_hidden_states=encoder_outputs)
|
||||||
decoder_prev_ids, encoder_hidden_states=encoder_outputs, return_dict=True
|
|
||||||
)
|
|
||||||
predicting_streams = decoder_outputs[1].view(1, model.config.ngram, 12, -1)
|
predicting_streams = decoder_outputs[1].view(1, model.config.ngram, 12, -1)
|
||||||
predicting_streams_logits = model.lm_head(predicting_streams)
|
predicting_streams_logits = model.lm_head(predicting_streams)
|
||||||
next_first_stream_logits = predicting_streams_logits[:, 0]
|
next_first_stream_logits = predicting_streams_logits[:, 0]
|
||||||
|
@ -174,7 +174,6 @@ class ReformerModelTester:
|
|||||||
attn_layers=self.attn_layers,
|
attn_layers=self.attn_layers,
|
||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
hash_seed=self.hash_seed,
|
hash_seed=self.hash_seed,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -103,7 +103,6 @@ class RobertaModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -131,7 +131,6 @@ if is_torch_available():
|
|||||||
post_attention_groups=self.post_attention_groups,
|
post_attention_groups=self.post_attention_groups,
|
||||||
intermediate_groups=self.intermediate_groups,
|
intermediate_groups=self.intermediate_groups,
|
||||||
output_groups=self.output_groups,
|
output_groups=self.output_groups,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -115,7 +115,6 @@ class T5ModelTester:
|
|||||||
bos_token_id=self.pad_token_id,
|
bos_token_id=self.pad_token_id,
|
||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
decoder_start_token_id=self.decoder_start_token_id,
|
decoder_start_token_id=self.decoder_start_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -121,7 +121,6 @@ class TFAlbertModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -182,7 +182,6 @@ class TFBartHeadTests(unittest.TestCase):
|
|||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
pad_token_id=1,
|
pad_token_id=1,
|
||||||
bos_token_id=0,
|
bos_token_id=0,
|
||||||
return_dict=True,
|
|
||||||
decoder_start_token_id=2,
|
decoder_start_token_id=2,
|
||||||
)
|
)
|
||||||
return config, input_ids, batch_size
|
return config, input_ids, batch_size
|
||||||
@ -206,7 +205,6 @@ class TFBartHeadTests(unittest.TestCase):
|
|||||||
encoder_ffn_dim=32,
|
encoder_ffn_dim=32,
|
||||||
decoder_ffn_dim=32,
|
decoder_ffn_dim=32,
|
||||||
max_position_embeddings=48,
|
max_position_embeddings=48,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
lm_model = TFBartForConditionalGeneration(config)
|
lm_model = TFBartForConditionalGeneration(config)
|
||||||
context = tf.fill((7, 2), 4)
|
context = tf.fill((7, 2), 4)
|
||||||
@ -356,7 +354,7 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase):
|
|||||||
padding="longest",
|
padding="longest",
|
||||||
truncation=True,
|
truncation=True,
|
||||||
)
|
)
|
||||||
features = self.xsum_1_1_model.get_encoder()(**batch, return_dict=True).last_hidden_state
|
features = self.xsum_1_1_model.get_encoder()(**batch).last_hidden_state
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
expected = np.array([[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]])
|
expected = np.array([[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]])
|
||||||
|
@ -120,7 +120,6 @@ class TFBertModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -39,7 +39,7 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
|
|||||||
dtype=tf.int32,
|
dtype=tf.int32,
|
||||||
) # J'aime le camembert !"
|
) # J'aime le camembert !"
|
||||||
|
|
||||||
output = model(input_ids, return_dict=True)["last_hidden_state"]
|
output = model(input_ids)["last_hidden_state"]
|
||||||
expected_shape = tf.TensorShape((1, 10, 768))
|
expected_shape = tf.TensorShape((1, 10, 768))
|
||||||
self.assertEqual(output.shape, expected_shape)
|
self.assertEqual(output.shape, expected_shape)
|
||||||
# compare the actual values for a slice.
|
# compare the actual values for a slice.
|
||||||
|
@ -284,7 +284,7 @@ class TFModelTesterMixin:
|
|||||||
if isinstance(after_outputs, tf.Tensor):
|
if isinstance(after_outputs, tf.Tensor):
|
||||||
out_1 = after_outputs.numpy()
|
out_1 = after_outputs.numpy()
|
||||||
elif isinstance(after_outputs, dict):
|
elif isinstance(after_outputs, dict):
|
||||||
out_1 = after_outputs[list(after_outputs.keys())[0]]
|
out_1 = after_outputs[list(after_outputs.keys())[0]].numpy()
|
||||||
else:
|
else:
|
||||||
out_1 = after_outputs[0].numpy()
|
out_1 = after_outputs[0].numpy()
|
||||||
out_2 = outputs[0].numpy()
|
out_2 = outputs[0].numpy()
|
||||||
|
@ -94,7 +94,6 @@ class TFCTRLModelTester(object):
|
|||||||
n_ctx=self.max_position_embeddings,
|
n_ctx=self.max_position_embeddings,
|
||||||
# type_vocab_size=self.type_vocab_size,
|
# type_vocab_size=self.type_vocab_size,
|
||||||
# initializer_range=self.initializer_range,
|
# initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||||
|
@ -91,7 +91,6 @@ class TFDistilBertModelTester:
|
|||||||
attention_dropout=self.attention_probs_dropout_prob,
|
attention_dropout=self.attention_probs_dropout_prob,
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -97,7 +97,6 @@ class TFElectraModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -114,7 +114,6 @@ class TFFlaubertModelTester:
|
|||||||
summary_type=self.summary_type,
|
summary_type=self.summary_type,
|
||||||
use_proj=self.use_proj,
|
use_proj=self.use_proj,
|
||||||
bos_token_id=self.bos_token_id,
|
bos_token_id=self.bos_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -137,7 +137,6 @@ class TFFunnelModelTester:
|
|||||||
activation_dropout=self.activation_dropout,
|
activation_dropout=self.activation_dropout,
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -104,7 +104,6 @@ class TFGPT2ModelTester:
|
|||||||
# initializer_range=self.initializer_range
|
# initializer_range=self.initializer_range
|
||||||
bos_token_id=self.bos_token_id,
|
bos_token_id=self.bos_token_id,
|
||||||
eos_token_id=self.eos_token_id,
|
eos_token_id=self.eos_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||||
|
@ -594,7 +594,9 @@ class TFLongformerModelIntegrationTest(unittest.TestCase):
|
|||||||
# 'Hello world! ' repeated 1000 times
|
# 'Hello world! ' repeated 1000 times
|
||||||
input_ids = tf.convert_to_tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=tf.dtypes.int32)
|
input_ids = tf.convert_to_tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=tf.dtypes.int32)
|
||||||
|
|
||||||
loss, prediction_scores = model(input_ids, labels=input_ids)
|
output = model(input_ids, labels=input_ids)
|
||||||
|
loss = output.loss
|
||||||
|
prediction_scores = output.logits
|
||||||
|
|
||||||
expected_loss = tf.constant(0.0073798)
|
expected_loss = tf.constant(0.0073798)
|
||||||
expected_prediction_scores_sum = tf.constant(-610476600.0)
|
expected_prediction_scores_sum = tf.constant(-610476600.0)
|
||||||
|
@ -297,7 +297,6 @@ class TFLxmertModelTester(object):
|
|||||||
matched_label=matched_label,
|
matched_label=matched_label,
|
||||||
ans=ans,
|
ans=ans,
|
||||||
output_attentions=output_attentions,
|
output_attentions=output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
result = model(
|
result = model(
|
||||||
input_ids,
|
input_ids,
|
||||||
@ -352,7 +351,6 @@ class TFLxmertModelTester(object):
|
|||||||
matched_label=matched_label,
|
matched_label=matched_label,
|
||||||
ans=ans,
|
ans=ans,
|
||||||
output_attentions=not output_attentions,
|
output_attentions=not output_attentions,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||||
@ -695,7 +693,8 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
model = tf.keras.models.load_model(tmpdirname)
|
model = tf.keras.models.load_model(tmpdirname)
|
||||||
outputs = model(class_inputs_dict)
|
outputs = model(class_inputs_dict)
|
||||||
|
|
||||||
language_hidden_states, vision_hidden_states = outputs[-2], outputs[-1]
|
language_hidden_states = outputs["language_hidden_states"]
|
||||||
|
vision_hidden_states = outputs["vision_hidden_states"]
|
||||||
|
|
||||||
self.assertEqual(len(language_hidden_states), self.model_tester.num_hidden_layers["language"] + 1)
|
self.assertEqual(len(language_hidden_states), self.model_tester.num_hidden_layers["language"] + 1)
|
||||||
self.assertEqual(len(vision_hidden_states), self.model_tester.num_hidden_layers["vision"] + 1)
|
self.assertEqual(len(vision_hidden_states), self.model_tester.num_hidden_layers["vision"] + 1)
|
||||||
@ -731,11 +730,9 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
model = tf.keras.models.load_model(tmpdirname)
|
model = tf.keras.models.load_model(tmpdirname)
|
||||||
outputs = model(class_inputs_dict)
|
outputs = model(class_inputs_dict)
|
||||||
|
|
||||||
language_attentions, vision_attentions, cross_encoder_attentions = (
|
language_attentions = outputs["language_attentions"]
|
||||||
outputs[-3],
|
vision_attentions = outputs["vision_attentions"]
|
||||||
outputs[-2],
|
cross_encoder_attentions = outputs["cross_encoder_attentions"]
|
||||||
outputs[-1],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"])
|
self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"])
|
||||||
self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"])
|
self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"])
|
||||||
|
@ -139,7 +139,6 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
embedding_size=self.embedding_size,
|
embedding_size=self.embedding_size,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -99,7 +99,6 @@ class TFOpenAIGPTModelTester:
|
|||||||
n_ctx=self.max_position_embeddings,
|
n_ctx=self.max_position_embeddings,
|
||||||
# type_vocab_size=self.type_vocab_size,
|
# type_vocab_size=self.type_vocab_size,
|
||||||
# initializer_range=self.initializer_range,
|
# initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||||
|
@ -97,7 +97,6 @@ class TFRobertaModelTester:
|
|||||||
max_position_embeddings=self.max_position_embeddings,
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
type_vocab_size=self.type_vocab_size,
|
type_vocab_size=self.type_vocab_size,
|
||||||
initializer_range=self.initializer_range,
|
initializer_range=self.initializer_range,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -78,7 +78,6 @@ class TFT5ModelTester:
|
|||||||
bos_token_id=self.pad_token_id,
|
bos_token_id=self.pad_token_id,
|
||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
decoder_start_token_id=self.pad_token_id,
|
decoder_start_token_id=self.pad_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (config, input_ids, input_mask, token_labels)
|
return (config, input_ids, input_mask, token_labels)
|
||||||
|
@ -77,7 +77,6 @@ class TFTransfoXLModelTester:
|
|||||||
div_val=self.div_val,
|
div_val=self.div_val,
|
||||||
n_layer=self.num_hidden_layers,
|
n_layer=self.num_hidden_layers,
|
||||||
eos_token_id=self.eos_token_id,
|
eos_token_id=self.eos_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (config, input_ids_1, input_ids_2, lm_labels)
|
return (config, input_ids_1, input_ids_2, lm_labels)
|
||||||
|
@ -114,7 +114,6 @@ class TFXLMModelTester:
|
|||||||
summary_type=self.summary_type,
|
summary_type=self.summary_type,
|
||||||
use_proj=self.use_proj,
|
use_proj=self.use_proj,
|
||||||
bos_token_id=self.bos_token_id,
|
bos_token_id=self.bos_token_id,
|
||||||
return_dict=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user