mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
[Audio Processor] Only pass sr to feat extractor (#20022)
* [Audio Processor] Only pass sr to feat extractor * move out of if/else * copy to other processors
This commit is contained in:
parent
fb1c8db78a
commit
3e39fd09a9
@ -58,6 +58,7 @@ class MCTCTProcessor(ProcessorMixin):
|
||||
audio = kwargs.pop("raw_speech")
|
||||
else:
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -67,7 +68,7 @@ class MCTCTProcessor(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
@ -61,6 +61,7 @@ class Speech2TextProcessor(ProcessorMixin):
|
||||
audio = kwargs.pop("raw_speech")
|
||||
else:
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -70,7 +71,7 @@ class Speech2TextProcessor(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
@ -60,6 +60,7 @@ class Speech2Text2Processor(ProcessorMixin):
|
||||
audio = kwargs.pop("raw_speech")
|
||||
else:
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -69,7 +70,7 @@ class Speech2Text2Processor(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
@ -80,6 +80,7 @@ class Wav2Vec2Processor(ProcessorMixin):
|
||||
audio = kwargs.pop("raw_speech")
|
||||
else:
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -89,7 +90,7 @@ class Wav2Vec2Processor(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
@ -228,6 +228,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
|
||||
audio = kwargs.pop("raw_speech")
|
||||
else:
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -237,7 +238,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
@ -85,6 +85,7 @@ class WhisperProcessor(ProcessorMixin):
|
||||
return self.current_processor(*args, **kwargs)
|
||||
|
||||
audio = kwargs.pop("audio", None)
|
||||
sampling_rate = kwargs.pop("sampling_rate", None)
|
||||
text = kwargs.pop("text", None)
|
||||
if len(args) > 0:
|
||||
audio = args[0]
|
||||
@ -94,7 +95,7 @@ class WhisperProcessor(ProcessorMixin):
|
||||
raise ValueError("You need to specify either an `audio` or `text` input to process.")
|
||||
|
||||
if audio is not None:
|
||||
inputs = self.feature_extractor(audio, *args, **kwargs)
|
||||
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
|
||||
if text is not None:
|
||||
encodings = self.tokenizer(text, **kwargs)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user