mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Wav2Vec2 models must either throw or deal with add_apater (#15409)
* Wav2Vec2 models must either throw or deal with add_apater Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Add pre-add_adapter backwards compatibility * Add pre-add_adapter backwards compatibility * Fix issue in tests/test_modeling_wav2vec2.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
parent
a459f7f97d
commit
7a1412e12b
@ -1097,7 +1097,10 @@ class HubertForCTC(HubertPreTrainedModel):
|
||||
"instantiate the model as follows: `HubertForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1215,6 +1218,10 @@ class HubertForSequenceClassification(HubertPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of Hubert adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.hubert = HubertModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -981,7 +981,10 @@ class SEWForCTC(SEWPreTrainedModel):
|
||||
"instantiate the model as follows: `SEWForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1099,6 +1102,10 @@ class SEWForSequenceClassification(SEWPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of SEW adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.sew = SEWModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -1513,7 +1513,10 @@ class SEWDForCTC(SEWDPreTrainedModel):
|
||||
"instantiate the model as follows: `SEWDForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1631,6 +1634,10 @@ class SEWDForSequenceClassification(SEWDPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of SEWD adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.sew_d = SEWDModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -1374,7 +1374,10 @@ class UniSpeechForCTC(UniSpeechPreTrainedModel):
|
||||
"instantiate the model as follows: `UniSpeechForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1492,6 +1495,10 @@ class UniSpeechForSequenceClassification(UniSpeechPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of UniSpeech adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.unispeech = UniSpeechModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -1402,7 +1402,10 @@ class UniSpeechSatForCTC(UniSpeechSatPreTrainedModel):
|
||||
"instantiate the model as follows: `UniSpeechSatForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1520,6 +1523,10 @@ class UniSpeechSatForSequenceClassification(UniSpeechSatPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of UniSpeechSat adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.unispeech_sat = UniSpeechSatModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
@ -1640,6 +1647,10 @@ class UniSpeechSatForAudioFrameClassification(UniSpeechSatPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Audio frame classification does not support the use of UniSpeechSat adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.unispeech_sat = UniSpeechSatModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -1702,7 +1702,10 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
|
||||
"instantiate the model as follows: `Wav2Vec2ForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1819,6 +1822,10 @@ class Wav2Vec2ForSequenceClassification(Wav2Vec2PreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of Wav2Vec2 adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.wav2vec2 = Wav2Vec2Model(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
@ -1938,6 +1945,10 @@ class Wav2Vec2ForAudioFrameClassification(Wav2Vec2PreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Audio frame classification does not support the use of Wav2Vec2 adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.wav2vec2 = Wav2Vec2Model(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -1352,7 +1352,10 @@ class WavLMForCTC(WavLMPreTrainedModel):
|
||||
"instantiate the model as follows: `WavLMForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
|
||||
"or define `vocab_size` of your model's configuration."
|
||||
)
|
||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
||||
output_hidden_size = (
|
||||
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
|
||||
)
|
||||
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
self.post_init()
|
||||
@ -1470,6 +1473,10 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Sequence classification does not support the use of WavLM adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.wavlm = WavLMModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
@ -1590,6 +1597,10 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
if hasattr(config, "add_adapter") and config.add_adapter:
|
||||
raise ValueError(
|
||||
"Audio frame classification does not support the use of WavLM adapters (config.add_adapter=True)"
|
||||
)
|
||||
self.wavlm = WavLMModel(config)
|
||||
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
|
||||
if config.use_weighted_layer_sum:
|
||||
|
@ -202,6 +202,17 @@ class Wav2Vec2ModelTester:
|
||||
result.last_hidden_state.shape, (self.batch_size, self.adapter_output_seq_length, self.hidden_size)
|
||||
)
|
||||
|
||||
def create_and_check_model_with_adapter_for_ctc(self, config, input_values, attention_mask):
|
||||
config.add_adapter = True
|
||||
config.output_hidden_size = 2 * config.hidden_size
|
||||
model = Wav2Vec2ForCTC(config=config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(input_values, attention_mask=attention_mask)
|
||||
self.parent.assertEqual(
|
||||
result.logits.shape, (self.batch_size, self.adapter_output_seq_length, self.vocab_size)
|
||||
)
|
||||
|
||||
def create_and_check_model_with_adapter_proj_dim(self, config, input_values, attention_mask):
|
||||
config.add_adapter = True
|
||||
config.output_hidden_size = 8
|
||||
@ -414,6 +425,10 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model_with_adapter(*config_and_inputs)
|
||||
|
||||
def test_model_with_adapter_for_ctc(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model_with_adapter_for_ctc(*config_and_inputs)
|
||||
|
||||
def test_model_with_adapter_proj_dim(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs)
|
||||
|
Loading…
Reference in New Issue
Block a user