diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index c3332512ac4..358de58abbb 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -1283,7 +1283,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel): >>> import soundfile as sf >>> model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr") - >>> processor = Speech2Textprocessor.from_pretrained("facebook/s2t-small-librispeech-asr") + >>> processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr") >>> def map_to_array(batch): >>> speech, _ = sf.read(batch["file"]) @@ -1293,7 +1293,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel): >>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) - >>> input_features = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt").input_features # Batch size 1 + >>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1 >>> generated_ids = model.generate(input_ids=input_features) >>> transcription = processor.batch_decode(generated_ids)