src/transformers/models/wav2vec2/modeling_wav2vec2.py src/transformers/models/wav2vec2/tokenization_wav2vec2.py src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py src/transformers/models/hubert/modeling_hubert.py src/transformers/models/wavlm/modeling_wavlm.py src/transformers/models/unispeech/modeling_unispeech.py src/transformers/models/unispeech_sat/modeling_unispeech_sat.py src/transformers/models/sew/modeling_sew.py src/transformers/models/sew_d/modeling_sew_d.py src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py src/transformers/models/speech_to_text/modeling_speech_to_text.py src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py src/transformers/models/data2vec/modeling_data2vec_audio.py src/transformers/models/vit/modeling_vit.py src/transformers/models/beit/modeling_beit.py src/transformers/models/deit/modeling_deit.py src/transformers/models/swin/modeling_swin.py src/transformers/models/convnext/modeling_convnext.py src/transformers/models/poolformer/modeling_poolformer.py src/transformers/models/vit_mae/modeling_vit_mae.py src/transformers/models/segformer/modeling_segformer.py src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py docs/source/quicktour.mdx docs/source/task_summary.mdx