mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-30 17:52:35 +06:00
Add w2v2bert to pipeline (#28585)
* generalize asr pipeline to fbank models * change w2v2 pipeline output * Update test_pipelines_automatic_speech_recognition.py
This commit is contained in:
parent
b2748a6efd
commit
268fc1fdfa
@ -517,8 +517,11 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
||||
out["stride"] = stride
|
||||
|
||||
else:
|
||||
input_values = model_inputs.pop("input_values")
|
||||
outputs = self.model(input_values=input_values, attention_mask=attention_mask)
|
||||
inputs = {
|
||||
self.model.main_input_name: model_inputs.pop(self.model.main_input_name),
|
||||
"attention_mask": attention_mask,
|
||||
}
|
||||
outputs = self.model(**inputs)
|
||||
logits = outputs.logits
|
||||
|
||||
if self.type == "ctc_with_lm":
|
||||
|
@ -298,6 +298,23 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
output = speech_recognizer(filename)
|
||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||
|
||||
@require_torch
|
||||
@slow
|
||||
def test_torch_large_with_input_features(self):
|
||||
speech_recognizer = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
model="hf-audio/wav2vec2-bert-CV16-en",
|
||||
framework="pt",
|
||||
)
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = speech_recognizer(waveform)
|
||||
self.assertEqual(output, {"text": ""})
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||
filename = ds[40]["file"]
|
||||
output = speech_recognizer(filename)
|
||||
self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
@slow
|
||||
|
Loading…
Reference in New Issue
Block a user