hertz is already per second (#21188)

This commit is contained in:
Matthijs Hollemans 2023-01-19 16:21:08 +01:00 committed by GitHub
parent 4bc18e7a83
commit 9b42c68f7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 6 additions and 6 deletions

View File

@ -34,7 +34,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
feature_size (`int`):
The feature dimension of the extracted features.
sampling_rate (`int`):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
padding_value (`float`):
The value that is used to fill the padding values / vectors.
"""

View File

@ -44,7 +44,7 @@ class ASTFeatureExtractor(SequenceFeatureExtractor):
feature_size (`int`, *optional*, defaults to 1):
The feature dimension of the extracted features.
sampling_rate (`int`, *optional*, defaults to 16000):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
num_mel_bins (`int`, *optional*, defaults to 128):
Number of Mel-frequency bins.
max_length (`int`, *optional*, defaults to 1024):

View File

@ -53,7 +53,7 @@ class MCTCTFeatureExtractor(SequenceFeatureExtractor):
feature_size (`int`, defaults to 80):
The feature dimension of the extracted features. This is the number of mel_frequency
sampling_rate (`int`, defaults to 16000):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
padding_value (`float`, defaults to 0.0):
The value that is used to fill the padding values.
hop_length (`int`, defaults to 10):

View File

@ -44,7 +44,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
feature_size (`int`, defaults to 80):
The feature dimension of the extracted features.
sampling_rate (`int`, defaults to 16000):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
num_mel_bins (`int`, defaults to 80):
Number of Mel-frequency bins.
padding_value (`float`, defaults to 0.0):

View File

@ -39,7 +39,7 @@ class Wav2Vec2FeatureExtractor(SequenceFeatureExtractor):
feature_size (`int`, defaults to 1):
The feature dimension of the extracted features.
sampling_rate (`int`, defaults to 16000):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
padding_value (`float`, defaults to 0.0):
The value that is used to fill the padding values.
do_normalize (`bool`, *optional*, defaults to `True`):

View File

@ -43,7 +43,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor):
feature_size (`int`, defaults to 80):
The feature dimension of the extracted features.
sampling_rate (`int`, defaults to 16000):
The sampling rate at which the audio files should be digitalized expressed in Hertz per second (Hz).
The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
hop_length (`int`, defaults to 160):
Length of the overlaping windows for the STFT used to obtain the Mel Frequency coefficients.
chunk_length (`int`, defaults to 30):