Add w2v2bert to pipeline (#28585)

* generalize asr pipeline to fbank models

* change w2v2 pipeline output

* Update test_pipelines_automatic_speech_recognition.py
This commit is contained in:
Yoach Lacombe 2024-01-19 11:25:01 +00:00 committed by GitHub
parent b2748a6efd
commit 268fc1fdfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 2 deletions

View File

@ -517,8 +517,11 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
out["stride"] = stride
else:
input_values = model_inputs.pop("input_values")
outputs = self.model(input_values=input_values, attention_mask=attention_mask)
inputs = {
self.model.main_input_name: model_inputs.pop(self.model.main_input_name),
"attention_mask": attention_mask,
}
outputs = self.model(**inputs)
logits = outputs.logits
if self.type == "ctc_with_lm":

View File

@ -298,6 +298,23 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
@require_torch
@slow
def test_torch_large_with_input_features(self):
speech_recognizer = pipeline(
task="automatic-speech-recognition",
model="hf-audio/wav2vec2-bert-CV16-en",
framework="pt",
)
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""})
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
@slow
@require_torch
@slow