Wav2Vec2 models must either throw or deal with add_apater (#15409)

* Wav2Vec2 models must either throw or deal with add_apater

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

* Add pre-add_adapter backwards compatibility

* Add pre-add_adapter backwards compatibility

* Fix issue in tests/test_modeling_wav2vec2.py

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
François REMY 2022-02-07 17:03:12 +01:00 committed by GitHub
parent a459f7f97d
commit 7a1412e12b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 83 additions and 7 deletions

View File

@ -1097,7 +1097,10 @@ class HubertForCTC(HubertPreTrainedModel):
"instantiate the model as follows: `HubertForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1215,6 +1218,10 @@ class HubertForSequenceClassification(HubertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of Hubert adapters (config.add_adapter=True)"
)
self.hubert = HubertModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -981,7 +981,10 @@ class SEWForCTC(SEWPreTrainedModel):
"instantiate the model as follows: `SEWForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1099,6 +1102,10 @@ class SEWForSequenceClassification(SEWPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of SEW adapters (config.add_adapter=True)"
)
self.sew = SEWModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -1513,7 +1513,10 @@ class SEWDForCTC(SEWDPreTrainedModel):
"instantiate the model as follows: `SEWDForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1631,6 +1634,10 @@ class SEWDForSequenceClassification(SEWDPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of SEWD adapters (config.add_adapter=True)"
)
self.sew_d = SEWDModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -1374,7 +1374,10 @@ class UniSpeechForCTC(UniSpeechPreTrainedModel):
"instantiate the model as follows: `UniSpeechForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1492,6 +1495,10 @@ class UniSpeechForSequenceClassification(UniSpeechPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of UniSpeech adapters (config.add_adapter=True)"
)
self.unispeech = UniSpeechModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -1402,7 +1402,10 @@ class UniSpeechSatForCTC(UniSpeechSatPreTrainedModel):
"instantiate the model as follows: `UniSpeechSatForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1520,6 +1523,10 @@ class UniSpeechSatForSequenceClassification(UniSpeechSatPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of UniSpeechSat adapters (config.add_adapter=True)"
)
self.unispeech_sat = UniSpeechSatModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:
@ -1640,6 +1647,10 @@ class UniSpeechSatForAudioFrameClassification(UniSpeechSatPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Audio frame classification does not support the use of UniSpeechSat adapters (config.add_adapter=True)"
)
self.unispeech_sat = UniSpeechSatModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -1702,7 +1702,10 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
"instantiate the model as follows: `Wav2Vec2ForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1819,6 +1822,10 @@ class Wav2Vec2ForSequenceClassification(Wav2Vec2PreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of Wav2Vec2 adapters (config.add_adapter=True)"
)
self.wav2vec2 = Wav2Vec2Model(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:
@ -1938,6 +1945,10 @@ class Wav2Vec2ForAudioFrameClassification(Wav2Vec2PreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Audio frame classification does not support the use of Wav2Vec2 adapters (config.add_adapter=True)"
)
self.wav2vec2 = Wav2Vec2Model(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -1352,7 +1352,10 @@ class WavLMForCTC(WavLMPreTrainedModel):
"instantiate the model as follows: `WavLMForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
"or define `vocab_size` of your model's configuration."
)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
output_hidden_size = (
config.output_hidden_size if hasattr(config, "add_adapter") and config.add_adapter else config.hidden_size
)
self.lm_head = nn.Linear(output_hidden_size, config.vocab_size)
# Initialize weights and apply final processing
self.post_init()
@ -1470,6 +1473,10 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Sequence classification does not support the use of WavLM adapters (config.add_adapter=True)"
)
self.wavlm = WavLMModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:
@ -1590,6 +1597,10 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
def __init__(self, config):
super().__init__(config)
if hasattr(config, "add_adapter") and config.add_adapter:
raise ValueError(
"Audio frame classification does not support the use of WavLM adapters (config.add_adapter=True)"
)
self.wavlm = WavLMModel(config)
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:

View File

@ -202,6 +202,17 @@ class Wav2Vec2ModelTester:
result.last_hidden_state.shape, (self.batch_size, self.adapter_output_seq_length, self.hidden_size)
)
def create_and_check_model_with_adapter_for_ctc(self, config, input_values, attention_mask):
config.add_adapter = True
config.output_hidden_size = 2 * config.hidden_size
model = Wav2Vec2ForCTC(config=config)
model.to(torch_device)
model.eval()
result = model(input_values, attention_mask=attention_mask)
self.parent.assertEqual(
result.logits.shape, (self.batch_size, self.adapter_output_seq_length, self.vocab_size)
)
def create_and_check_model_with_adapter_proj_dim(self, config, input_values, attention_mask):
config.add_adapter = True
config.output_hidden_size = 8
@ -414,6 +425,10 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_with_adapter(*config_and_inputs)
def test_model_with_adapter_for_ctc(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_with_adapter_for_ctc(*config_and_inputs)
def test_model_with_adapter_proj_dim(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs)