[Audio Processor] Only pass sr to feat extractor (#20022)

* [Audio Processor] Only pass sr to feat extractor

* move out of if/else

* copy to other processors
This commit is contained in:
Sanchit Gandhi 2022-11-08 08:59:03 +00:00 committed by GitHub
parent fb1c8db78a
commit 3e39fd09a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 12 additions and 6 deletions

View File

@ -58,6 +58,7 @@ class MCTCTProcessor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -67,7 +68,7 @@ class MCTCTProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)

View File

@ -61,6 +61,7 @@ class Speech2TextProcessor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -70,7 +71,7 @@ class Speech2TextProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)

View File

@ -60,6 +60,7 @@ class Speech2Text2Processor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -69,7 +70,7 @@ class Speech2Text2Processor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)

View File

@ -80,6 +80,7 @@ class Wav2Vec2Processor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -89,7 +90,7 @@ class Wav2Vec2Processor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)

View File

@ -228,6 +228,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -237,7 +238,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)

View File

@ -85,6 +85,7 @@ class WhisperProcessor(ProcessorMixin):
return self.current_processor(*args, **kwargs)
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
@ -94,7 +95,7 @@ class WhisperProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)