mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Expose get_config() on ModelTesters (#12812)
* Expose get_config() on ModelTesters * Typo
This commit is contained in:
parent
cabcc75171
commit
c3d9ac7607
@ -22,6 +22,7 @@ from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from transformers import {{cookiecutter.camelcase_modelname}}Config
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
|
||||
|
||||
@ -30,7 +31,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
@ -112,7 +112,12 @@ class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return {{cookiecutter.camelcase_modelname}}Config(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -127,8 +132,6 @@ class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import AlbertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
AlbertConfig,
|
||||
AlbertForMaskedLM,
|
||||
AlbertForMultipleChoice,
|
||||
AlbertForPreTraining,
|
||||
@ -90,7 +89,12 @@ class AlbertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = AlbertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return AlbertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -105,8 +109,6 @@ class AlbertModelTester:
|
||||
num_hidden_groups=self.num_hidden_groups,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -21,7 +21,7 @@ import unittest
|
||||
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BartConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -35,7 +35,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
BartConfig,
|
||||
BartForCausalLM,
|
||||
BartForConditionalGeneration,
|
||||
BartForQuestionAnswering,
|
||||
@ -78,7 +77,6 @@ def prepare_bart_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class BartModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -127,7 +125,12 @@ class BartModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = BartConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_bart_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return BartConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -143,8 +146,6 @@ class BartModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_bart_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -30,7 +30,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
BertConfig,
|
||||
BertForMaskedLM,
|
||||
BertForMultipleChoice,
|
||||
BertForNextSentencePrediction,
|
||||
@ -112,7 +111,15 @@ class BertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = BertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
"""
|
||||
Returns a tiny configuration by default.
|
||||
"""
|
||||
return BertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -127,8 +134,6 @@ class BertModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BertGenerationConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, r
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import BertGenerationConfig, BertGenerationDecoder, BertGenerationEncoder
|
||||
from transformers import BertGenerationDecoder, BertGenerationEncoder
|
||||
|
||||
|
||||
class BertGenerationEncoderTester:
|
||||
@ -79,7 +79,12 @@ class BertGenerationEncoderTester:
|
||||
if self.use_labels:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = BertGenerationConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, input_mask, token_labels
|
||||
|
||||
def get_config(self):
|
||||
return BertGenerationConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -93,8 +98,6 @@ class BertGenerationEncoderTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, input_mask, token_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -18,7 +18,7 @@
|
||||
import unittest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers import BigBirdConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
@ -32,7 +32,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
BigBirdConfig,
|
||||
BigBirdForCausalLM,
|
||||
BigBirdForMaskedLM,
|
||||
BigBirdForMultipleChoice,
|
||||
@ -126,7 +125,12 @@ class BigBirdModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = BigBirdConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return BigBirdConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -147,8 +151,6 @@ class BigBirdModelTester:
|
||||
position_embedding_type=self.position_embedding_type,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -19,7 +19,7 @@ import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BigBirdPegasusConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -31,7 +31,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
BigBirdPegasusConfig,
|
||||
BigBirdPegasusForCausalLM,
|
||||
BigBirdPegasusForConditionalGeneration,
|
||||
BigBirdPegasusForQuestionAnswering,
|
||||
@ -69,7 +68,6 @@ def prepare_bigbird_pegasus_inputs_dict(
|
||||
return input_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
class BigBirdPegasusModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -129,7 +127,12 @@ class BigBirdPegasusModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = BigBirdPegasusConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_bigbird_pegasus_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return BigBirdPegasusConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -150,8 +153,6 @@ class BigBirdPegasusModelTester:
|
||||
num_random_blocks=self.num_random_blocks,
|
||||
scale_embedding=self.scale_embedding,
|
||||
)
|
||||
inputs_dict = prepare_bigbird_pegasus_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -17,7 +17,7 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BlenderbotConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -29,7 +29,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import BlenderbotConfig, BlenderbotForConditionalGeneration, BlenderbotModel, BlenderbotTokenizer
|
||||
from transformers import BlenderbotForConditionalGeneration, BlenderbotModel, BlenderbotTokenizer
|
||||
from transformers.models.blenderbot.modeling_blenderbot import (
|
||||
BlenderbotDecoder,
|
||||
BlenderbotEncoder,
|
||||
@ -68,7 +68,6 @@ def prepare_blenderbot_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class BlenderbotModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -109,7 +108,6 @@ class BlenderbotModelTester:
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||
3,
|
||||
)
|
||||
@ -117,7 +115,12 @@ class BlenderbotModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = BlenderbotConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_blenderbot_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return BlenderbotConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -133,8 +136,6 @@ class BlenderbotModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_blenderbot_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -17,7 +17,7 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import BlenderbotSmallConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -29,12 +29,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
BlenderbotSmallConfig,
|
||||
BlenderbotSmallForConditionalGeneration,
|
||||
BlenderbotSmallModel,
|
||||
BlenderbotSmallTokenizer,
|
||||
)
|
||||
from transformers import BlenderbotSmallForConditionalGeneration, BlenderbotSmallModel, BlenderbotSmallTokenizer
|
||||
from transformers.models.blenderbot_small.modeling_blenderbot_small import (
|
||||
BlenderbotSmallDecoder,
|
||||
BlenderbotSmallEncoder,
|
||||
@ -73,7 +68,6 @@ def prepare_blenderbot_small_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class BlenderbotSmallModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -114,7 +108,6 @@ class BlenderbotSmallModelTester:
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||
3,
|
||||
)
|
||||
@ -122,7 +115,12 @@ class BlenderbotSmallModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = BlenderbotSmallConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_blenderbot_small_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return BlenderbotSmallConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -138,8 +136,6 @@ class BlenderbotSmallModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_blenderbot_small_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -18,7 +18,7 @@
|
||||
import unittest
|
||||
from typing import List, Tuple
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import CanineConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
CanineConfig,
|
||||
CanineForMultipleChoice,
|
||||
CanineForQuestionAnswering,
|
||||
CanineForSequenceClassification,
|
||||
@ -106,7 +105,12 @@ class CanineModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = CanineConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return CanineConfig(
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
@ -120,8 +124,6 @@ class CanineModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -21,6 +21,7 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
import requests
|
||||
from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig
|
||||
from transformers.file_utils import is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
|
||||
@ -32,7 +33,7 @@ if is_torch_available():
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from transformers import CLIPConfig, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPVisionConfig, CLIPVisionModel
|
||||
from transformers import CLIPModel, CLIPTextModel, CLIPVisionModel
|
||||
from transformers.models.clip.modeling_clip import CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
|
||||
@ -77,7 +78,12 @@ class CLIPVisionModelTester:
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||
config = CLIPVisionConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, pixel_values
|
||||
|
||||
def get_config(self):
|
||||
return CLIPVisionConfig(
|
||||
image_size=self.image_size,
|
||||
patch_size=self.patch_size,
|
||||
num_channels=self.num_channels,
|
||||
@ -90,8 +96,6 @@ class CLIPVisionModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, pixel_values
|
||||
|
||||
def create_and_check_model(self, config, pixel_values):
|
||||
model = CLIPVisionModel(config=config)
|
||||
model.to(torch_device)
|
||||
@ -323,7 +327,12 @@ class CLIPTextModelTester:
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = CLIPTextConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, input_mask
|
||||
|
||||
def get_config(self):
|
||||
return CLIPTextConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -335,8 +344,6 @@ class CLIPTextModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, input_mask
|
||||
|
||||
def create_and_check_model(self, config, input_ids, input_mask):
|
||||
model = CLIPTextModel(config=config)
|
||||
model.to(torch_device)
|
||||
@ -409,10 +416,15 @@ class CLIPModelTester:
|
||||
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
|
||||
vision_config, pixel_values = self.vision_model_tester.prepare_config_and_inputs()
|
||||
|
||||
config = CLIPConfig.from_text_vision_configs(text_config, vision_config, projection_dim=64)
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, attention_mask, pixel_values
|
||||
|
||||
def get_config(self):
|
||||
return CLIPConfig.from_text_vision_configs(
|
||||
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64
|
||||
)
|
||||
|
||||
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
|
||||
model = CLIPModel(config).to(torch_device).eval()
|
||||
result = model(input_ids, pixel_values, attention_mask)
|
||||
|
@ -18,7 +18,7 @@
|
||||
import unittest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers import ConvBertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -31,7 +31,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
ConvBertConfig,
|
||||
ConvBertForMaskedLM,
|
||||
ConvBertForMultipleChoice,
|
||||
ConvBertForQuestionAnswering,
|
||||
@ -110,7 +109,12 @@ class ConvBertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = ConvBertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return ConvBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -125,8 +129,6 @@ class ConvBertModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import CTRLConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -28,7 +28,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
CTRLConfig,
|
||||
CTRLForSequenceClassification,
|
||||
CTRLLMHeadModel,
|
||||
CTRLModel,
|
||||
@ -88,21 +87,7 @@ class CTRLModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = CTRLConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
n_embd=self.hidden_size,
|
||||
n_layer=self.num_hidden_layers,
|
||||
n_head=self.num_attention_heads,
|
||||
# intermediate_size=self.intermediate_size,
|
||||
# hidden_act=self.hidden_act,
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
config = self.get_config()
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
|
||||
@ -118,6 +103,23 @@ class CTRLModelTester:
|
||||
choice_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return CTRLConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
n_embd=self.hidden_size,
|
||||
n_layer=self.num_hidden_layers,
|
||||
n_head=self.num_attention_heads,
|
||||
# intermediate_size=self.intermediate_size,
|
||||
# hidden_act=self.hidden_act,
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
|
||||
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
model = CTRLModel(config=config)
|
||||
model.to(torch_device)
|
||||
|
@ -12,10 +12,9 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import DebertaConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -26,7 +25,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
DebertaConfig,
|
||||
DebertaForMaskedLM,
|
||||
DebertaForQuestionAnswering,
|
||||
DebertaForSequenceClassification,
|
||||
@ -36,6 +34,179 @@ if is_torch_available():
|
||||
from transformers.models.deberta.modeling_deberta import DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
|
||||
class DebertaModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
relative_attention=False,
|
||||
position_biased_input=True,
|
||||
pos_att_type="None",
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.relative_attention = relative_attention
|
||||
self.position_biased_input = position_biased_input
|
||||
self.pos_att_type = pos_att_type
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return DebertaConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
relative_attention=self.relative_attention,
|
||||
position_biased_input=self.position_biased_input,
|
||||
pos_att_type=self.pos_att_type,
|
||||
)
|
||||
|
||||
def check_loss_output(self, result):
|
||||
self.parent.assertListEqual(list(result.loss.size()), [])
|
||||
|
||||
def create_and_check_deberta_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids)[0]
|
||||
|
||||
self.parent.assertListEqual(list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size])
|
||||
|
||||
def create_and_check_deberta_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_deberta_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
||||
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
|
||||
self.check_loss_output(result)
|
||||
|
||||
def create_and_check_deberta_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_deberta_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids,
|
||||
attention_mask=input_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
start_positions=sequence_labels,
|
||||
end_positions=sequence_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
@ -56,179 +227,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
test_head_masking = False
|
||||
is_encoder_decoder = False
|
||||
|
||||
class DebertaModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
relative_attention=False,
|
||||
position_biased_input=True,
|
||||
pos_att_type="None",
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.relative_attention = relative_attention
|
||||
self.position_biased_input = position_biased_input
|
||||
self.pos_att_type = pos_att_type
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = DebertaConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
relative_attention=self.relative_attention,
|
||||
position_biased_input=self.position_biased_input,
|
||||
pos_att_type=self.pos_att_type,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def check_loss_output(self, result):
|
||||
self.parent.assertListEqual(list(result.loss.size()), [])
|
||||
|
||||
def create_and_check_deberta_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids)[0]
|
||||
|
||||
self.parent.assertListEqual(
|
||||
list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_deberta_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_deberta_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
||||
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
|
||||
self.check_loss_output(result)
|
||||
|
||||
def create_and_check_deberta_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_deberta_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids,
|
||||
attention_mask=input_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
start_positions=sequence_labels,
|
||||
end_positions=sequence_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = DebertaModelTest.DebertaModelTester(self)
|
||||
self.model_tester = DebertaModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class=DebertaConfig, hidden_size=37)
|
||||
|
||||
def test_config(self):
|
||||
|
@ -12,10 +12,9 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import DebertaV2Config, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -26,7 +25,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
DebertaV2Config,
|
||||
DebertaV2ForMaskedLM,
|
||||
DebertaV2ForQuestionAnswering,
|
||||
DebertaV2ForSequenceClassification,
|
||||
@ -36,6 +34,179 @@ if is_torch_available():
|
||||
from transformers.models.deberta_v2.modeling_deberta_v2 import DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
|
||||
class DebertaV2ModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
relative_attention=False,
|
||||
position_biased_input=True,
|
||||
pos_att_type="None",
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.relative_attention = relative_attention
|
||||
self.position_biased_input = position_biased_input
|
||||
self.pos_att_type = pos_att_type
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return DebertaV2Config(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
relative_attention=self.relative_attention,
|
||||
position_biased_input=self.position_biased_input,
|
||||
pos_att_type=self.pos_att_type,
|
||||
)
|
||||
|
||||
def check_loss_output(self, result):
|
||||
self.parent.assertListEqual(list(result.loss.size()), [])
|
||||
|
||||
def create_and_check_deberta_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2Model(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids)[0]
|
||||
|
||||
self.parent.assertListEqual(list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size])
|
||||
|
||||
def create_and_check_deberta_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2ForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_deberta_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaV2ForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
||||
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
|
||||
self.check_loss_output(result)
|
||||
|
||||
def create_and_check_deberta_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaV2ForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_deberta_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2ForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids,
|
||||
attention_mask=input_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
start_positions=sequence_labels,
|
||||
end_positions=sequence_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
class DebertaV2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
@ -56,179 +227,8 @@ class DebertaV2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
test_head_masking = False
|
||||
is_encoder_decoder = False
|
||||
|
||||
class DebertaV2ModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
relative_attention=False,
|
||||
position_biased_input=True,
|
||||
pos_att_type="None",
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.relative_attention = relative_attention
|
||||
self.position_biased_input = position_biased_input
|
||||
self.pos_att_type = pos_att_type
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = DebertaV2Config(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
relative_attention=self.relative_attention,
|
||||
position_biased_input=self.position_biased_input,
|
||||
pos_att_type=self.pos_att_type,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def check_loss_output(self, result):
|
||||
self.parent.assertListEqual(list(result.loss.size()), [])
|
||||
|
||||
def create_and_check_deberta_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2Model(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
|
||||
sequence_output = model(input_ids)[0]
|
||||
|
||||
self.parent.assertListEqual(
|
||||
list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_deberta_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2ForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_deberta_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaV2ForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
|
||||
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
|
||||
self.check_loss_output(result)
|
||||
|
||||
def create_and_check_deberta_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DebertaV2ForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_deberta_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DebertaV2ForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids,
|
||||
attention_mask=input_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
start_positions=sequence_labels,
|
||||
end_positions=sequence_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = DebertaV2ModelTest.DebertaV2ModelTester(self)
|
||||
self.model_tester = DebertaV2ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class=DebertaV2Config, hidden_size=37)
|
||||
|
||||
def test_config(self):
|
||||
|
@ -18,6 +18,7 @@
|
||||
import inspect
|
||||
import unittest
|
||||
|
||||
from transformers import DeiTConfig
|
||||
from transformers.file_utils import cached_property, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
|
||||
@ -31,7 +32,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_MAPPING,
|
||||
DeiTConfig,
|
||||
DeiTForImageClassification,
|
||||
DeiTForImageClassificationWithTeacher,
|
||||
DeiTModel,
|
||||
@ -92,7 +92,12 @@ class DeiTModelTester:
|
||||
if self.use_labels:
|
||||
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
|
||||
config = DeiTConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, pixel_values, labels
|
||||
|
||||
def get_config(self):
|
||||
return DeiTConfig(
|
||||
image_size=self.image_size,
|
||||
patch_size=self.patch_size,
|
||||
num_channels=self.num_channels,
|
||||
@ -107,8 +112,6 @@ class DeiTModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, pixel_values, labels
|
||||
|
||||
def create_and_check_model(self, config, pixel_values, labels):
|
||||
model = DeiTModel(config=config)
|
||||
model.to(torch_device)
|
||||
|
@ -19,7 +19,7 @@ import inspect
|
||||
import math
|
||||
import unittest
|
||||
|
||||
from transformers import is_timm_available, is_vision_available
|
||||
from transformers import DetrConfig, is_timm_available, is_vision_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_timm, require_vision, slow, torch_device
|
||||
|
||||
@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te
|
||||
if is_timm_available():
|
||||
import torch
|
||||
|
||||
from transformers import DetrConfig, DetrForObjectDetection, DetrForSegmentation, DetrModel
|
||||
from transformers import DetrForObjectDetection, DetrForSegmentation, DetrModel
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@ -40,7 +40,6 @@ if is_vision_available():
|
||||
from transformers import DetrFeatureExtractor
|
||||
|
||||
|
||||
@require_timm
|
||||
class DetrModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -102,7 +101,11 @@ class DetrModelTester:
|
||||
target["masks"] = torch.rand(self.n_targets, self.min_size, self.max_size, device=torch_device)
|
||||
labels.append(target)
|
||||
|
||||
config = DetrConfig(
|
||||
config = self.get_config()
|
||||
return config, pixel_values, pixel_mask, labels
|
||||
|
||||
def get_config(self):
|
||||
return DetrConfig(
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
decoder_layers=self.num_hidden_layers,
|
||||
@ -115,7 +118,6 @@ class DetrModelTester:
|
||||
num_queries=self.num_queries,
|
||||
num_labels=self.num_labels,
|
||||
)
|
||||
return config, pixel_values, pixel_mask, labels
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, pixel_values, pixel_mask, labels = self.prepare_config_and_inputs()
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import DistilBertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -28,7 +28,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
DistilBertConfig,
|
||||
DistilBertForMaskedLM,
|
||||
DistilBertForMultipleChoice,
|
||||
DistilBertForQuestionAnswering,
|
||||
@ -37,160 +36,162 @@ if is_torch_available():
|
||||
DistilBertModel,
|
||||
)
|
||||
|
||||
class DistilBertModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=False,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
class DistilBertModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=False,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = DistilBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
dim=self.hidden_size,
|
||||
n_layers=self.num_hidden_layers,
|
||||
n_heads=self.num_attention_heads,
|
||||
hidden_dim=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
config = self.get_config()
|
||||
|
||||
def create_and_check_distilbert_model(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, input_mask)
|
||||
result = model(input_ids)
|
||||
self.parent.assertEqual(
|
||||
result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)
|
||||
)
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_distilbert_for_masked_lm(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
def get_config(self):
|
||||
return DistilBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
dim=self.hidden_size,
|
||||
n_layers=self.num_hidden_layers,
|
||||
n_heads=self.num_attention_heads,
|
||||
hidden_dim=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
def create_and_check_distilbert_for_question_answering(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
def create_and_check_distilbert_model(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, input_mask)
|
||||
result = model(input_ids)
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_distilbert_for_sequence_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DistilBertForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=sequence_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
def create_and_check_distilbert_for_masked_lm(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_distilbert_for_token_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DistilBertForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
def create_and_check_distilbert_for_question_answering(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = DistilBertForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
def create_and_check_distilbert_for_sequence_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DistilBertForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=sequence_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
|
||||
def create_and_check_distilbert_for_multiple_choice(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = DistilBertForMultipleChoice(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
result = model(
|
||||
multiple_choice_inputs_ids,
|
||||
attention_mask=multiple_choice_input_mask,
|
||||
labels=choice_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
def create_and_check_distilbert_for_token_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = DistilBertForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_distilbert_for_multiple_choice(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = DistilBertForMultipleChoice(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
result = model(
|
||||
multiple_choice_inputs_ids,
|
||||
attention_mask=multiple_choice_input_mask,
|
||||
labels=choice_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import DPRConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -26,7 +26,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import DPRConfig, DPRContextEncoder, DPRQuestionEncoder, DPRReader, DPRReaderTokenizer
|
||||
from transformers import DPRContextEncoder, DPRQuestionEncoder, DPRReader, DPRReaderTokenizer
|
||||
from transformers.models.dpr.modeling_dpr import (
|
||||
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
@ -104,7 +104,12 @@ class DPRModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = DPRConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return DPRConfig(
|
||||
projection_dim=self.projection_dim,
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
@ -119,8 +124,6 @@ class DPRModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_context_encoder(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import ElectraConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
ElectraConfig,
|
||||
ElectraForMaskedLM,
|
||||
ElectraForMultipleChoice,
|
||||
ElectraForPreTraining,
|
||||
@ -89,7 +88,21 @@ class ElectraModelTester:
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1)
|
||||
|
||||
config = ElectraConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
fake_token_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return ElectraConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -104,17 +117,6 @@ class ElectraModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
fake_token_labels,
|
||||
)
|
||||
|
||||
def create_and_check_electra_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -13,10 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import FlaubertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +26,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
FlaubertConfig,
|
||||
FlaubertForMultipleChoice,
|
||||
FlaubertForQuestionAnswering,
|
||||
FlaubertForQuestionAnsweringSimple,
|
||||
@ -96,7 +94,22 @@ class FlaubertModelTester(object):
|
||||
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = FlaubertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
choice_labels,
|
||||
input_mask,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return FlaubertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
n_special=self.n_special,
|
||||
emb_dim=self.hidden_size,
|
||||
@ -115,18 +128,6 @@ class FlaubertModelTester(object):
|
||||
use_proj=self.use_proj,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
choice_labels,
|
||||
input_mask,
|
||||
)
|
||||
|
||||
def create_and_check_flaubert_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -19,7 +19,7 @@ import unittest
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from parameterized import parameterized
|
||||
from transformers import is_torch_available
|
||||
from transformers import FSMTConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -32,7 +32,7 @@ if is_torch_available():
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer
|
||||
from transformers import FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer
|
||||
from transformers.models.fsmt.modeling_fsmt import (
|
||||
SinusoidalPositionalEmbedding,
|
||||
_prepare_fsmt_decoder_inputs,
|
||||
@ -42,8 +42,7 @@ if is_torch_available():
|
||||
from transformers.pipelines import TranslationPipeline
|
||||
|
||||
|
||||
@require_torch
|
||||
class ModelTester:
|
||||
class FSMTModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
@ -78,7 +77,12 @@ class ModelTester:
|
||||
)
|
||||
input_ids[:, -1] = 2 # Eos Token
|
||||
|
||||
config = FSMTConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_fsmt_inputs_dict(config, input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return FSMTConfig(
|
||||
vocab_size=self.src_vocab_size, # hack needed for common tests
|
||||
src_vocab_size=self.src_vocab_size,
|
||||
tgt_vocab_size=self.tgt_vocab_size,
|
||||
@ -97,8 +101,6 @@ class ModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_fsmt_inputs_dict(config, input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
@ -141,7 +143,7 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
|
||||
test_missing_keys = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = ModelTester(self)
|
||||
self.model_tester = FSMTModelTester(self)
|
||||
self.langs = ["en", "ru"]
|
||||
config = {
|
||||
"langs": self.langs,
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import FunnelTokenizer, is_torch_available
|
||||
from transformers import FunnelConfig, FunnelTokenizer, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -30,7 +30,6 @@ if is_torch_available():
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
FunnelBaseModel,
|
||||
FunnelConfig,
|
||||
FunnelForMaskedLM,
|
||||
FunnelForMultipleChoice,
|
||||
FunnelForPreTraining,
|
||||
@ -127,7 +126,21 @@ class FunnelModelTester:
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1)
|
||||
|
||||
config = FunnelConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
fake_token_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return FunnelConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
block_sizes=self.block_sizes,
|
||||
num_decoder_layers=self.num_decoder_layers,
|
||||
@ -143,17 +156,6 @@ class FunnelModelTester:
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
fake_token_labels,
|
||||
)
|
||||
|
||||
def create_and_check_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -17,7 +17,7 @@
|
||||
import datetime
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import GPT2Config, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -30,7 +30,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
GPT2Config,
|
||||
GPT2DoubleHeadsModel,
|
||||
GPT2ForSequenceClassification,
|
||||
GPT2LMHeadModel,
|
||||
@ -119,25 +118,7 @@ class GPT2ModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = GPT2Config(
|
||||
vocab_size=self.vocab_size,
|
||||
n_embd=self.hidden_size,
|
||||
n_layer=self.num_hidden_layers,
|
||||
n_head=self.num_attention_heads,
|
||||
# intermediate_size=self.intermediate_size,
|
||||
# hidden_act=self.hidden_act,
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range,
|
||||
use_cache=not gradient_checkpointing,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
gradient_checkpointing=gradient_checkpointing,
|
||||
)
|
||||
config = self.get_config(gradient_checkpointing=gradient_checkpointing)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
|
||||
@ -153,6 +134,27 @@ class GPT2ModelTester:
|
||||
choice_labels,
|
||||
)
|
||||
|
||||
def get_config(self, gradient_checkpointing=False):
|
||||
return GPT2Config(
|
||||
vocab_size=self.vocab_size,
|
||||
n_embd=self.hidden_size,
|
||||
n_layer=self.num_hidden_layers,
|
||||
n_head=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
use_cache=not gradient_checkpointing,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
gradient_checkpointing=gradient_checkpointing,
|
||||
)
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import GPTNeoConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -32,7 +32,6 @@ if is_torch_available():
|
||||
from transformers import (
|
||||
GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
GPT2Tokenizer,
|
||||
GPTNeoConfig,
|
||||
GPTNeoForCausalLM,
|
||||
GPTNeoForSequenceClassification,
|
||||
GPTNeoModel,
|
||||
@ -123,20 +122,7 @@ class GPTNeoModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = GPTNeoConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_hidden_layers,
|
||||
num_heads=self.num_attention_heads,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
use_cache=not gradient_checkpointing,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
gradient_checkpointing=gradient_checkpointing,
|
||||
window_size=self.window_size,
|
||||
attention_types=self.attention_types,
|
||||
)
|
||||
config = self.get_config(gradient_checkpointing=False)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
|
||||
@ -152,6 +138,22 @@ class GPTNeoModelTester:
|
||||
choice_labels,
|
||||
)
|
||||
|
||||
def get_config(self, gradient_checkpointing=False):
|
||||
return GPTNeoConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_hidden_layers,
|
||||
num_heads=self.num_attention_heads,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
use_cache=not gradient_checkpointing,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
gradient_checkpointing=gradient_checkpointing,
|
||||
window_size=self.window_size,
|
||||
attention_types=self.attention_types,
|
||||
)
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -21,7 +21,7 @@ import unittest
|
||||
import pytest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor, ids_tensor, random_attention_mask
|
||||
from transformers import is_torch_available
|
||||
from transformers import HubertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_datasets, require_soundfile, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import HubertConfig, HubertForCTC, HubertModel, Wav2Vec2Processor
|
||||
from transformers import HubertForCTC, HubertModel, Wav2Vec2Processor
|
||||
from transformers.models.hubert.modeling_hubert import _compute_mask_indices
|
||||
|
||||
|
||||
@ -98,7 +98,12 @@ class HubertModelTester:
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = HubertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_values, attention_mask
|
||||
|
||||
def get_config(self):
|
||||
return HubertConfig(
|
||||
hidden_size=self.hidden_size,
|
||||
feat_extract_norm=self.feat_extract_norm,
|
||||
feat_extract_dropout=self.feat_extract_dropout,
|
||||
@ -119,8 +124,6 @@ class HubertModelTester:
|
||||
vocab_size=self.vocab_size,
|
||||
)
|
||||
|
||||
return config, input_values, attention_mask
|
||||
|
||||
def create_and_check_model(self, config, input_values, attention_mask):
|
||||
model = HubertModel(config=config)
|
||||
model.to(torch_device)
|
||||
|
@ -17,7 +17,7 @@
|
||||
import copy
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import IBertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -30,7 +30,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
IBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
IBertConfig,
|
||||
IBertForMaskedLM,
|
||||
IBertForMultipleChoice,
|
||||
IBertForQuestionAnswering,
|
||||
@ -97,7 +96,12 @@ class IBertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = IBertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return IBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -112,8 +116,6 @@ class IBertModelTester:
|
||||
quant_mode=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import LayoutLMConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
LayoutLMConfig,
|
||||
LayoutLMForMaskedLM,
|
||||
LayoutLMForSequenceClassification,
|
||||
LayoutLMForTokenClassification,
|
||||
@ -120,7 +119,12 @@ class LayoutLMModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = LayoutLMConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return LayoutLMConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -134,8 +138,6 @@ class LayoutLMModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -19,7 +19,7 @@ import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import LEDConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
@ -34,7 +34,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
LEDConfig,
|
||||
LEDForConditionalGeneration,
|
||||
LEDForQuestionAnswering,
|
||||
LEDForSequenceClassification,
|
||||
@ -75,7 +74,6 @@ def prepare_led_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class LEDModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -141,7 +139,12 @@ class LEDModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = LEDConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_led_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return LEDConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -158,8 +161,6 @@ class LEDModelTester:
|
||||
pad_token_id=self.pad_token_id,
|
||||
attention_window=self.attention_window,
|
||||
)
|
||||
inputs_dict = prepare_led_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import LongformerConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
LongformerConfig,
|
||||
LongformerForMaskedLM,
|
||||
LongformerForMultipleChoice,
|
||||
LongformerForQuestionAnswering,
|
||||
@ -100,7 +99,12 @@ class LongformerModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = LongformerConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return LongformerConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -115,8 +119,6 @@ class LongformerModelTester:
|
||||
attention_window=self.attention_window,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_attention_mask_determinism(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -13,10 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Testing suite for the PyTorch LUKE model. """
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import LukeConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +26,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
LukeConfig,
|
||||
LukeForEntityClassification,
|
||||
LukeForEntityPairClassification,
|
||||
LukeForEntitySpanClassification,
|
||||
@ -154,7 +152,25 @@ class LukeModelTester:
|
||||
[self.batch_size, self.entity_length], self.num_entity_span_classification_labels
|
||||
)
|
||||
|
||||
config = LukeConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
attention_mask,
|
||||
token_type_ids,
|
||||
entity_ids,
|
||||
entity_attention_mask,
|
||||
entity_token_type_ids,
|
||||
entity_position_ids,
|
||||
sequence_labels,
|
||||
entity_classification_labels,
|
||||
entity_pair_classification_labels,
|
||||
entity_span_classification_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return LukeConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
entity_vocab_size=self.entity_vocab_size,
|
||||
entity_emb_size=self.entity_emb_size,
|
||||
@ -172,21 +188,6 @@ class LukeModelTester:
|
||||
use_entity_aware_attention=self.use_entity_aware_attention,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
attention_mask,
|
||||
token_type_ids,
|
||||
entity_ids,
|
||||
entity_attention_mask,
|
||||
entity_token_type_ids,
|
||||
entity_position_ids,
|
||||
sequence_labels,
|
||||
entity_classification_labels,
|
||||
entity_pair_classification_labels,
|
||||
entity_span_classification_labels,
|
||||
)
|
||||
|
||||
def create_and_check_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import LxmertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
@ -33,7 +33,6 @@ if is_torch_available():
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
LxmertConfig,
|
||||
LxmertForPreTraining,
|
||||
LxmertForQuestionAnswering,
|
||||
LxmertModel,
|
||||
@ -170,7 +169,24 @@ class LxmertModelTester:
|
||||
if self.task_matched:
|
||||
matched_label = ids_tensor([self.batch_size], self.num_labels)
|
||||
|
||||
config = LxmertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
visual_feats,
|
||||
bounding_boxes,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
obj_labels,
|
||||
masked_lm_labels,
|
||||
matched_label,
|
||||
ans,
|
||||
output_attentions,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return LxmertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
@ -204,20 +220,6 @@ class LxmertModelTester:
|
||||
output_hidden_states=self.output_hidden_states,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
visual_feats,
|
||||
bounding_boxes,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
obj_labels,
|
||||
masked_lm_labels,
|
||||
matched_label,
|
||||
ans,
|
||||
output_attentions,
|
||||
)
|
||||
|
||||
def create_and_check_lxmert_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -19,7 +19,7 @@ import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import M2M100Config, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Model, M2M100Tokenizer
|
||||
from transformers import M2M100ForConditionalGeneration, M2M100Model, M2M100Tokenizer
|
||||
from transformers.models.m2m_100.modeling_m2m_100 import M2M100Decoder, M2M100Encoder
|
||||
|
||||
|
||||
@ -66,7 +66,6 @@ def prepare_m2m_100_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class M2M100ModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -125,7 +124,12 @@ class M2M100ModelTester:
|
||||
input_ids = input_ids.clamp(self.pad_token_id + 1)
|
||||
decoder_input_ids = decoder_input_ids.clamp(self.pad_token_id + 1)
|
||||
|
||||
config = M2M100Config(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_m2m_100_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return M2M100Config(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -143,8 +147,6 @@ class M2M100ModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_m2m_100_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -17,7 +17,7 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import MarianConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.hf_api import HfApi
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
@ -34,7 +34,6 @@ if is_torch_available():
|
||||
AutoConfig,
|
||||
AutoModelWithLMHead,
|
||||
AutoTokenizer,
|
||||
MarianConfig,
|
||||
MarianModel,
|
||||
MarianMTModel,
|
||||
TranslationPipeline,
|
||||
@ -83,7 +82,6 @@ def prepare_marian_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class MarianModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -126,7 +124,6 @@ class MarianModelTester:
|
||||
self.decoder_start_token_id = decoder_start_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||
3,
|
||||
)
|
||||
@ -134,7 +131,12 @@ class MarianModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = MarianConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_marian_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return MarianConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -151,8 +153,6 @@ class MarianModelTester:
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.decoder_start_token_id,
|
||||
)
|
||||
inputs_dict = prepare_marian_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -19,7 +19,7 @@ import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import MBartConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -34,7 +34,6 @@ if is_torch_available():
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
BatchEncoding,
|
||||
MBartConfig,
|
||||
MBartForCausalLM,
|
||||
MBartForConditionalGeneration,
|
||||
MBartForQuestionAnswering,
|
||||
@ -75,7 +74,6 @@ def prepare_mbart_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class MBartModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -124,7 +122,12 @@ class MBartModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = MBartConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return MBartConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -140,8 +143,6 @@ class MBartModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -19,7 +19,7 @@ import math
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import MegatronBertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -32,7 +32,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
MegatronBertConfig,
|
||||
MegatronBertForCausalLM,
|
||||
MegatronBertForMaskedLM,
|
||||
MegatronBertForMultipleChoice,
|
||||
@ -115,7 +114,12 @@ class MegatronBertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = MegatronBertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return MegatronBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -131,8 +135,6 @@ class MegatronBertModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_megatron_bert_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import MobileBertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
MobileBertConfig,
|
||||
MobileBertForMaskedLM,
|
||||
MobileBertForMultipleChoice,
|
||||
MobileBertForNextSentencePrediction,
|
||||
@ -111,7 +110,12 @@ class MobileBertModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = MobileBertConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return MobileBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -127,8 +131,6 @@ class MobileBertModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_mobilebert_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import MPNetConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -27,7 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
MPNetConfig,
|
||||
MPNetForMaskedLM,
|
||||
MPNetForMultipleChoice,
|
||||
MPNetForQuestionAnswering,
|
||||
@ -104,7 +103,11 @@ class MPNetModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = MPNetConfig(
|
||||
config = self.get_config()
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return MPNetConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -116,7 +119,6 @@ class MPNetModelTester:
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_mpnet_model(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
@ -17,7 +17,7 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import PegasusConfig, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -30,7 +30,7 @@ from .test_modeling_mbart import AbstractSeq2SeqIntegrationTest
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForSeq2SeqLM, PegasusConfig, PegasusForConditionalGeneration, PegasusModel
|
||||
from transformers import AutoModelForSeq2SeqLM, PegasusForConditionalGeneration, PegasusModel
|
||||
from transformers.models.pegasus.modeling_pegasus import PegasusDecoder, PegasusEncoder, PegasusForCausalLM
|
||||
|
||||
|
||||
@ -65,7 +65,6 @@ def prepare_pegasus_inputs_dict(
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class PegasusModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
@ -114,7 +113,12 @@ class PegasusModelTester:
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = PegasusConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_pegasus_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return PegasusConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -130,8 +134,6 @@ class PegasusModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_pegasus_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -13,12 +13,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import ProphetNetConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -30,7 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
ProphetNetConfig,
|
||||
ProphetNetDecoder,
|
||||
ProphetNetEncoder,
|
||||
ProphetNetForCausalLM,
|
||||
@ -124,7 +122,19 @@ class ProphetNetModelTester:
|
||||
if self.use_labels:
|
||||
lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
|
||||
|
||||
config = ProphetNetConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask,
|
||||
decoder_attention_mask,
|
||||
lm_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return ProphetNetConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_encoder_layers=self.num_encoder_layers,
|
||||
@ -145,15 +155,6 @@ class ProphetNetModelTester:
|
||||
is_encoder_decoder=self.is_encoder_decoder,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask,
|
||||
decoder_attention_mask,
|
||||
lm_labels,
|
||||
)
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import ReformerConfig, is_torch_available
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
@ -36,7 +36,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
ReformerConfig,
|
||||
ReformerForMaskedLM,
|
||||
ReformerForQuestionAnswering,
|
||||
ReformerForSequenceClassification,
|
||||
@ -51,44 +50,44 @@ class ReformerModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=None,
|
||||
seq_length=None,
|
||||
is_training=None,
|
||||
is_decoder=None,
|
||||
use_input_mask=None,
|
||||
use_labels=None,
|
||||
vocab_size=None,
|
||||
attention_head_size=None,
|
||||
hidden_size=None,
|
||||
num_attention_heads=None,
|
||||
local_attn_chunk_length=None,
|
||||
local_num_chunks_before=None,
|
||||
local_num_chunks_after=None,
|
||||
batch_size=13,
|
||||
seq_length=32,
|
||||
is_training=True,
|
||||
is_decoder=True,
|
||||
use_input_mask=True,
|
||||
use_labels=True,
|
||||
vocab_size=32,
|
||||
attention_head_size=16,
|
||||
hidden_size=32,
|
||||
num_attention_heads=2,
|
||||
local_attn_chunk_length=4,
|
||||
local_num_chunks_before=1,
|
||||
local_num_chunks_after=0,
|
||||
num_buckets=None,
|
||||
num_hashes=1,
|
||||
lsh_attn_chunk_length=None,
|
||||
lsh_num_chunks_before=None,
|
||||
lsh_num_chunks_after=None,
|
||||
chunk_size_lm_head=None,
|
||||
chunk_size_feed_forward=None,
|
||||
feed_forward_size=None,
|
||||
hidden_act=None,
|
||||
hidden_dropout_prob=None,
|
||||
local_attention_probs_dropout_prob=None,
|
||||
chunk_size_lm_head=0,
|
||||
chunk_size_feed_forward=0,
|
||||
feed_forward_size=32,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
local_attention_probs_dropout_prob=0.1,
|
||||
lsh_attention_probs_dropout_prob=None,
|
||||
max_position_embeddings=None,
|
||||
initializer_range=None,
|
||||
axial_norm_std=None,
|
||||
layer_norm_eps=None,
|
||||
axial_pos_embds=None,
|
||||
axial_pos_shape=None,
|
||||
axial_pos_embds_dim=None,
|
||||
attn_layers=None,
|
||||
pad_token_id=None,
|
||||
eos_token_id=None,
|
||||
max_position_embeddings=512,
|
||||
initializer_range=0.02,
|
||||
axial_norm_std=1.0,
|
||||
layer_norm_eps=1e-12,
|
||||
axial_pos_embds=True,
|
||||
axial_pos_shape=[4, 8],
|
||||
axial_pos_embds_dim=[16, 16],
|
||||
attn_layers=["local", "local", "local", "local"],
|
||||
pad_token_id=0,
|
||||
eos_token_id=2,
|
||||
scope=None,
|
||||
hash_seed=None,
|
||||
num_labels=None,
|
||||
hash_seed=0,
|
||||
num_labels=2,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@ -101,7 +100,7 @@ class ReformerModelTester:
|
||||
self.attention_head_size = attention_head_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.num_hidden_layers = len(attn_layers)
|
||||
self.num_hidden_layers = len(attn_layers) if attn_layers is not None else 0
|
||||
self.local_attn_chunk_length = local_attn_chunk_length
|
||||
self.local_num_chunks_after = local_num_chunks_after
|
||||
self.local_num_chunks_before = local_num_chunks_before
|
||||
@ -149,7 +148,17 @@ class ReformerModelTester:
|
||||
if self.use_labels:
|
||||
choice_labels = ids_tensor([self.batch_size], 2)
|
||||
|
||||
config = ReformerConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
input_mask,
|
||||
choice_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return ReformerConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -177,13 +186,6 @@ class ReformerModelTester:
|
||||
hash_seed=self.hash_seed,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
input_mask,
|
||||
choice_labels,
|
||||
)
|
||||
|
||||
def create_and_check_reformer_model(self, config, input_ids, input_mask, choice_labels):
|
||||
model = ReformerModel(config=config)
|
||||
model.to(torch_device)
|
||||
@ -593,45 +595,8 @@ class ReformerLocalAttnModelTest(ReformerTesterMixin, GenerationTesterMixin, Mod
|
||||
test_torchscript = False
|
||||
test_sequence_classification_problem_types = True
|
||||
|
||||
def prepare_kwargs(self):
|
||||
return {
|
||||
"batch_size": 13,
|
||||
"seq_length": 32,
|
||||
"is_training": True,
|
||||
"is_decoder": True,
|
||||
"use_input_mask": True,
|
||||
"use_labels": True,
|
||||
"vocab_size": 32,
|
||||
"attention_head_size": 16,
|
||||
"hidden_size": 32,
|
||||
"num_attention_heads": 2,
|
||||
"local_attn_chunk_length": 4,
|
||||
"local_num_chunks_before": 1,
|
||||
"local_num_chunks_after": 0,
|
||||
"chunk_size_lm_head": 0,
|
||||
"chunk_size_feed_forward": 0,
|
||||
"feed_forward_size": 32,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"local_attention_probs_dropout_prob": 0.1,
|
||||
"max_position_embeddings": 512,
|
||||
"initializer_range": 0.02,
|
||||
"axial_norm_std": 1.0,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"axial_pos_embds": True,
|
||||
"axial_pos_shape": [4, 8],
|
||||
"axial_pos_embds_dim": [16, 16],
|
||||
"attn_layers": ["local", "local", "local", "local"],
|
||||
"pad_token_id": 0,
|
||||
"eos_token_id": 2,
|
||||
"scope": None,
|
||||
"hash_seed": 0,
|
||||
"num_labels": 2,
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
tester_kwargs = self.prepare_kwargs()
|
||||
self.model_tester = ReformerModelTester(self, **tester_kwargs)
|
||||
self.model_tester = ReformerModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class=ReformerConfig, hidden_size=37)
|
||||
|
||||
@slow
|
||||
@ -716,49 +681,46 @@ class ReformerLSHAttnModelTest(ReformerTesterMixin, ModelTesterMixin, Generation
|
||||
test_headmasking = False
|
||||
test_torchscript = False
|
||||
|
||||
def prepare_kwargs(self):
|
||||
return {
|
||||
"batch_size": 13,
|
||||
"seq_length": 13,
|
||||
"use_input_mask": True,
|
||||
"use_labels": True,
|
||||
"is_training": False,
|
||||
"is_decoder": True,
|
||||
"vocab_size": 32,
|
||||
"attention_head_size": 16,
|
||||
"hidden_size": 64,
|
||||
"num_attention_heads": 2,
|
||||
"num_buckets": 2,
|
||||
"num_hashes": 4,
|
||||
"lsh_attn_chunk_length": 4,
|
||||
"lsh_num_chunks_before": 1,
|
||||
"lsh_num_chunks_after": 0,
|
||||
"chunk_size_lm_head": 5,
|
||||
"chunk_size_feed_forward": 6,
|
||||
"feed_forward_size": 32,
|
||||
"hidden_act": "relu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"lsh_attention_probs_dropout_prob": 0.1,
|
||||
"max_position_embeddings": 512,
|
||||
"initializer_range": 0.02,
|
||||
"axial_norm_std": 1.0,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"axial_pos_embds": True,
|
||||
"axial_pos_shape": [4, 8],
|
||||
"axial_pos_embds_dim": [16, 48],
|
||||
# sanotheu
|
||||
# "attn_layers": ["lsh", "lsh", "lsh", "lsh"],
|
||||
"attn_layers": ["lsh"],
|
||||
"pad_token_id": 0,
|
||||
"eos_token_id": 2,
|
||||
"scope": None,
|
||||
"hash_seed": 0,
|
||||
"num_labels": 2,
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
tester_kwargs = self.prepare_kwargs()
|
||||
self.model_tester = ReformerModelTester(self, **tester_kwargs)
|
||||
self.model_tester = ReformerModelTester(
|
||||
self,
|
||||
batch_size=13,
|
||||
seq_length=13,
|
||||
use_input_mask=True,
|
||||
use_labels=True,
|
||||
is_training=False,
|
||||
is_decoder=True,
|
||||
vocab_size=32,
|
||||
attention_head_size=16,
|
||||
hidden_size=64,
|
||||
num_attention_heads=2,
|
||||
num_buckets=2,
|
||||
num_hashes=4,
|
||||
lsh_attn_chunk_length=4,
|
||||
lsh_num_chunks_before=1,
|
||||
lsh_num_chunks_after=0,
|
||||
chunk_size_lm_head=5,
|
||||
chunk_size_feed_forward=6,
|
||||
feed_forward_size=32,
|
||||
hidden_act="relu",
|
||||
hidden_dropout_prob=0.1,
|
||||
lsh_attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
initializer_range=0.02,
|
||||
axial_norm_std=1.0,
|
||||
layer_norm_eps=1e-12,
|
||||
axial_pos_embds=True,
|
||||
axial_pos_shape=[4, 8],
|
||||
axial_pos_embds_dim=[16, 48],
|
||||
# sanotheu
|
||||
# attn_layers=[lsh,lsh,lsh,lsh],
|
||||
attn_layers=["lsh"],
|
||||
pad_token_id=0,
|
||||
eos_token_id=2,
|
||||
scope=None,
|
||||
hash_seed=0,
|
||||
num_labels=2,
|
||||
)
|
||||
self.config_tester = ConfigTester(self, config_class=ReformerConfig, hidden_size=37)
|
||||
|
||||
def _check_attentions_for_generate(
|
||||
|
@ -17,7 +17,7 @@
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import RobertaConfig, is_torch_available
|
||||
from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
RobertaConfig,
|
||||
RobertaForCausalLM,
|
||||
RobertaForMaskedLM,
|
||||
RobertaForMultipleChoice,
|
||||
@ -94,7 +93,12 @@ class RobertaModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = RobertaConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return RobertaConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -108,8 +112,6 @@ class RobertaModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -18,7 +18,7 @@
|
||||
import unittest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers import RoFormerConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -29,7 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
RoFormerConfig,
|
||||
RoFormerForCausalLM,
|
||||
RoFormerForMaskedLM,
|
||||
RoFormerForMultipleChoice,
|
||||
@ -113,7 +112,12 @@ class RoFormerModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = RoFormerConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def get_config(self):
|
||||
return RoFormerConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -128,8 +132,6 @@ class RoFormerModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
@ -14,13 +14,13 @@
|
||||
# limitations under the License.
|
||||
""" Testing suite for the PyTorch Speech2Text model. """
|
||||
|
||||
|
||||
import copy
|
||||
import inspect
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import Speech2TextConfig
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import (
|
||||
is_torch_available,
|
||||
@ -40,12 +40,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
Speech2TextConfig,
|
||||
Speech2TextForConditionalGeneration,
|
||||
Speech2TextModel,
|
||||
Speech2TextProcessor,
|
||||
)
|
||||
from transformers import Speech2TextForConditionalGeneration, Speech2TextModel, Speech2TextProcessor
|
||||
from transformers.models.speech_to_text.modeling_speech_to_text import Speech2TextDecoder, Speech2TextEncoder
|
||||
|
||||
|
||||
@ -142,7 +137,17 @@ class Speech2TextModelTester:
|
||||
attention_mask = torch.ones([self.batch_size, self.seq_length], dtype=torch.long, device=torch_device)
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(2)
|
||||
|
||||
config = Speech2TextConfig(
|
||||
config = self.get_config()
|
||||
inputs_dict = prepare_speech_to_text_inputs_dict(
|
||||
config,
|
||||
input_features=input_features,
|
||||
decoder_input_ids=decoder_input_ids,
|
||||
attention_mask=attention_mask,
|
||||
)
|
||||
return config, inputs_dict
|
||||
|
||||
def get_config(self):
|
||||
return Speech2TextConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@ -165,13 +170,6 @@ class Speech2TextModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_speech_to_text_inputs_dict(
|
||||
config,
|
||||
input_features=input_features,
|
||||
decoder_input_ids=decoder_input_ids,
|
||||
attention_mask=attention_mask,
|
||||
)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import SqueezeBertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -28,7 +28,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
SQUEEZEBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
SqueezeBertConfig,
|
||||
SqueezeBertForMaskedLM,
|
||||
SqueezeBertForMultipleChoice,
|
||||
SqueezeBertForQuestionAnswering,
|
||||
@ -37,179 +36,181 @@ if is_torch_available():
|
||||
SqueezeBertModel,
|
||||
)
|
||||
|
||||
class SqueezeBertModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=False,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=64,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
q_groups=2,
|
||||
k_groups=2,
|
||||
v_groups=2,
|
||||
post_attention_groups=2,
|
||||
intermediate_groups=4,
|
||||
output_groups=1,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
self.q_groups = q_groups
|
||||
self.k_groups = k_groups
|
||||
self.v_groups = v_groups
|
||||
self.post_attention_groups = post_attention_groups
|
||||
self.intermediate_groups = intermediate_groups
|
||||
self.output_groups = output_groups
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
class SqueezeBertModelTester(object):
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=False,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=64,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
q_groups=2,
|
||||
k_groups=2,
|
||||
v_groups=2,
|
||||
post_attention_groups=2,
|
||||
intermediate_groups=4,
|
||||
output_groups=1,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
self.q_groups = q_groups
|
||||
self.k_groups = k_groups
|
||||
self.v_groups = v_groups
|
||||
self.post_attention_groups = post_attention_groups
|
||||
self.intermediate_groups = intermediate_groups
|
||||
self.output_groups = output_groups
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = SqueezeBertConfig(
|
||||
embedding_size=self.hidden_size,
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
attention_probs_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
q_groups=self.q_groups,
|
||||
k_groups=self.k_groups,
|
||||
v_groups=self.v_groups,
|
||||
post_attention_groups=self.post_attention_groups,
|
||||
intermediate_groups=self.intermediate_groups,
|
||||
output_groups=self.output_groups,
|
||||
)
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
config = self.get_config()
|
||||
|
||||
def create_and_check_squeezebert_model(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, input_mask)
|
||||
result = model(input_ids)
|
||||
self.parent.assertEqual(
|
||||
result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)
|
||||
)
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_squeezebert_for_masked_lm(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
def get_config(self):
|
||||
return SqueezeBertConfig(
|
||||
embedding_size=self.hidden_size,
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
attention_probs_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
q_groups=self.q_groups,
|
||||
k_groups=self.k_groups,
|
||||
v_groups=self.v_groups,
|
||||
post_attention_groups=self.post_attention_groups,
|
||||
intermediate_groups=self.intermediate_groups,
|
||||
output_groups=self.output_groups,
|
||||
)
|
||||
|
||||
def create_and_check_squeezebert_for_question_answering(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
def create_and_check_squeezebert_model(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertModel(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, input_mask)
|
||||
result = model(input_ids)
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_squeezebert_for_sequence_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = SqueezeBertForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=sequence_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
def create_and_check_squeezebert_for_masked_lm(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertForMaskedLM(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_squeezebert_for_token_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = SqueezeBertForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
def create_and_check_squeezebert_for_question_answering(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = SqueezeBertForQuestionAnswering(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
|
||||
)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
def create_and_check_squeezebert_for_sequence_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = SqueezeBertForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_ids, attention_mask=input_mask, labels=sequence_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
|
||||
def create_and_check_squeezebert_for_multiple_choice(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = SqueezeBertForMultipleChoice(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
result = model(
|
||||
multiple_choice_inputs_ids,
|
||||
attention_mask=multiple_choice_input_mask,
|
||||
labels=choice_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
def create_and_check_squeezebert_for_token_classification(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = SqueezeBertForTokenClassification(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_squeezebert_for_multiple_choice(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = SqueezeBertForMultipleChoice(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
||||
result = model(
|
||||
multiple_choice_inputs_ids,
|
||||
attention_mask=multiple_choice_input_mask,
|
||||
labels=choice_labels,
|
||||
)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -18,7 +18,7 @@ import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import T5Config, is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
@ -30,7 +30,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import ByT5Tokenizer, T5Config, T5EncoderModel, T5ForConditionalGeneration, T5Model, T5Tokenizer
|
||||
from transformers import ByT5Tokenizer, T5EncoderModel, T5ForConditionalGeneration, T5Model, T5Tokenizer
|
||||
from transformers.models.t5.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
|
||||
@ -100,7 +100,19 @@ class T5ModelTester:
|
||||
if self.use_labels:
|
||||
lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
|
||||
|
||||
config = T5Config(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask,
|
||||
decoder_attention_mask,
|
||||
lm_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return T5Config(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
d_ff=self.d_ff,
|
||||
@ -117,15 +129,6 @@ class T5ModelTester:
|
||||
decoder_start_token_id=self.decoder_start_token_id,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask,
|
||||
decoder_attention_mask,
|
||||
lm_labels,
|
||||
)
|
||||
|
||||
def check_prepare_lm_labels_via_shift_left(
|
||||
self,
|
||||
config,
|
||||
|
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import copy
|
||||
import unittest
|
||||
|
||||
@ -29,6 +28,7 @@ from transformers import (
|
||||
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
|
||||
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
TapasConfig,
|
||||
is_torch_available,
|
||||
)
|
||||
from transformers.file_utils import cached_property
|
||||
@ -43,7 +43,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
TapasConfig,
|
||||
TapasForMaskedLM,
|
||||
TapasForQuestionAnswering,
|
||||
TapasForSequenceClassification,
|
||||
@ -183,7 +182,24 @@ class TapasModelTester:
|
||||
float_answer = floats_tensor([self.batch_size]).to(torch_device)
|
||||
aggregation_labels = ids_tensor([self.batch_size], self.num_aggregation_labels).to(torch_device)
|
||||
|
||||
config = TapasConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
input_mask,
|
||||
token_type_ids,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
labels,
|
||||
numeric_values,
|
||||
numeric_values_scale,
|
||||
float_answer,
|
||||
aggregation_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return TapasConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
@ -220,20 +236,6 @@ class TapasModelTester:
|
||||
disable_per_token_loss=self.disable_per_token_loss,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
input_mask,
|
||||
token_type_ids,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
labels,
|
||||
numeric_values,
|
||||
numeric_values_scale,
|
||||
float_answer,
|
||||
aggregation_labels,
|
||||
)
|
||||
|
||||
def create_and_check_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -17,7 +17,7 @@ import copy
|
||||
import random
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import TransfoXLConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -29,7 +29,7 @@ if is_torch_available():
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from transformers import TransfoXLConfig, TransfoXLForSequenceClassification, TransfoXLLMHeadModel, TransfoXLModel
|
||||
from transformers import TransfoXLForSequenceClassification, TransfoXLLMHeadModel, TransfoXLModel
|
||||
from transformers.models.transfo_xl.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
|
||||
@ -69,7 +69,12 @@ class TransfoXLModelTester:
|
||||
if self.use_labels:
|
||||
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = TransfoXLConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (config, input_ids_1, input_ids_2, lm_labels)
|
||||
|
||||
def get_config(self):
|
||||
return TransfoXLConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
mem_len=self.mem_len,
|
||||
clamp_len=self.clamp_len,
|
||||
@ -85,8 +90,6 @@ class TransfoXLModelTester:
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
|
||||
return (config, input_ids_1, input_ids_2, lm_labels)
|
||||
|
||||
def set_seed(self):
|
||||
random.seed(self.seed)
|
||||
torch.manual_seed(self.seed)
|
||||
|
@ -14,12 +14,11 @@
|
||||
# limitations under the License.
|
||||
""" Testing suite for the PyTorch VisualBERT model. """
|
||||
|
||||
|
||||
import copy
|
||||
import unittest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers import VisualBertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -30,7 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
VisualBertConfig,
|
||||
VisualBertForMultipleChoice,
|
||||
VisualBertForPreTraining,
|
||||
VisualBertForQuestionAnswering,
|
||||
@ -98,7 +96,7 @@ class VisualBertModelTester:
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config(self):
|
||||
def get_config(self):
|
||||
return VisualBertConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
@ -138,7 +136,7 @@ class VisualBertModelTester:
|
||||
if self.use_visual_token_type_ids:
|
||||
visual_token_type_ids = ids_tensor([self.batch_size, self.visual_seq_length], self.type_vocab_size)
|
||||
|
||||
config = self.prepare_config()
|
||||
config = self.get_config()
|
||||
return config, {
|
||||
"input_ids": input_ids,
|
||||
"token_type_ids": token_type_ids,
|
||||
@ -198,7 +196,7 @@ class VisualBertModelTester:
|
||||
if self.use_labels:
|
||||
labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = self.prepare_config()
|
||||
config = self.get_config()
|
||||
return config, {
|
||||
"input_ids": input_ids,
|
||||
"token_type_ids": token_type_ids,
|
||||
|
@ -18,6 +18,7 @@
|
||||
import inspect
|
||||
import unittest
|
||||
|
||||
from transformers import ViTConfig
|
||||
from transformers.file_utils import cached_property, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
|
||||
@ -29,7 +30,7 @@ if is_torch_available():
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from transformers import ViTConfig, ViTForImageClassification, ViTModel
|
||||
from transformers import ViTForImageClassification, ViTModel
|
||||
from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST, to_2tuple
|
||||
|
||||
|
||||
@ -86,7 +87,12 @@ class ViTModelTester:
|
||||
if self.use_labels:
|
||||
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
|
||||
config = ViTConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return config, pixel_values, labels
|
||||
|
||||
def get_config(self):
|
||||
return ViTConfig(
|
||||
image_size=self.image_size,
|
||||
patch_size=self.patch_size,
|
||||
num_channels=self.num_channels,
|
||||
@ -101,8 +107,6 @@ class ViTModelTester:
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
|
||||
return config, pixel_values, labels
|
||||
|
||||
def create_and_check_model(self, config, pixel_values, labels):
|
||||
model = ViTModel(config=config)
|
||||
model.to(torch_device)
|
||||
|
@ -21,7 +21,7 @@ import unittest
|
||||
import pytest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor, ids_tensor, random_attention_mask
|
||||
from transformers import is_torch_available
|
||||
from transformers import Wav2Vec2Config, is_torch_available
|
||||
from transformers.testing_utils import require_datasets, require_soundfile, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -32,7 +32,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
Wav2Vec2Config,
|
||||
Wav2Vec2FeatureExtractor,
|
||||
Wav2Vec2ForCTC,
|
||||
Wav2Vec2ForMaskedLM,
|
||||
@ -106,7 +105,12 @@ class Wav2Vec2ModelTester:
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = Wav2Vec2Config(
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_values, attention_mask
|
||||
|
||||
def get_config(self):
|
||||
return Wav2Vec2Config(
|
||||
hidden_size=self.hidden_size,
|
||||
feat_extract_norm=self.feat_extract_norm,
|
||||
feat_extract_dropout=self.feat_extract_dropout,
|
||||
@ -127,8 +131,6 @@ class Wav2Vec2ModelTester:
|
||||
vocab_size=self.vocab_size,
|
||||
)
|
||||
|
||||
return config, input_values, attention_mask
|
||||
|
||||
def create_and_check_model(self, config, input_values, attention_mask):
|
||||
model = Wav2Vec2Model(config=config)
|
||||
model.to(torch_device)
|
||||
|
@ -13,10 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import XLMConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -28,7 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
XLMConfig,
|
||||
XLMForMultipleChoice,
|
||||
XLMForQuestionAnswering,
|
||||
XLMForQuestionAnsweringSimple,
|
||||
@ -97,7 +95,22 @@ class XLMModelTester:
|
||||
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = XLMConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
choice_labels,
|
||||
input_mask,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return XLMConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
n_special=self.n_special,
|
||||
emb_dim=self.hidden_size,
|
||||
@ -118,18 +131,6 @@ class XLMModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
choice_labels,
|
||||
input_mask,
|
||||
)
|
||||
|
||||
def create_and_check_xlm_model(
|
||||
self,
|
||||
config,
|
||||
|
@ -13,11 +13,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import random
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers import XLNetConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@ -29,7 +28,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
XLNetConfig,
|
||||
XLNetForMultipleChoice,
|
||||
XLNetForQuestionAnswering,
|
||||
XLNetForQuestionAnsweringSimple,
|
||||
@ -131,7 +129,25 @@ class XLNetModelTester:
|
||||
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
config = XLNetConfig(
|
||||
config = self.get_config()
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids_1,
|
||||
input_ids_2,
|
||||
input_ids_q,
|
||||
perm_mask,
|
||||
input_mask,
|
||||
target_mapping,
|
||||
segment_ids,
|
||||
lm_labels,
|
||||
sequence_labels,
|
||||
is_impossible_labels,
|
||||
token_labels,
|
||||
)
|
||||
|
||||
def get_config(self):
|
||||
return XLNetConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
n_head=self.num_attention_heads,
|
||||
@ -150,21 +166,6 @@ class XLNetModelTester:
|
||||
eos_token_id=self.eos_token_id,
|
||||
)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids_1,
|
||||
input_ids_2,
|
||||
input_ids_q,
|
||||
perm_mask,
|
||||
input_mask,
|
||||
target_mapping,
|
||||
segment_ids,
|
||||
lm_labels,
|
||||
sequence_labels,
|
||||
is_impossible_labels,
|
||||
token_labels,
|
||||
)
|
||||
|
||||
def set_seed(self):
|
||||
random.seed(self.seed)
|
||||
torch.manual_seed(self.seed)
|
||||
|
Loading…
Reference in New Issue
Block a user