transformers/docs/source/en/_toctree.yml

- sections:
  - local: index
    title: Transformers
  - local: installation
    title: Installation
  - local: quicktour
    title: Quickstart
  title: Get started
- isExpanded: false
  sections:
  - sections:
    - local: models
      title: Loading models
    - local: custom_models
      title: Customizing models
    - local: how_to_hack_models
      title: Customizing model components
    - local: model_sharing
      title: Sharing
    - local: add_new_model
      title: Adding a new model to Transformers
    - local: modular_transformers
      title: Modular Transformers
    - local: auto_docstring
      title: Document your models
    - local: attention_interface
      title: Customizing attention function
    title: Models
  - sections:
    - local: fast_tokenizers
      title: Tokenizers
    - local: image_processors
      title: Image processors
    - local: video_processors
      title: Video processors
    - local: backbones
      title: Backbones
    - local: feature_extractors
      title: Feature extractors
    - local: processors
      title: Processors
    - local: tokenizer_summary
      title: Summary of the tokenizers
    - local: pad_truncation
      title: Padding and truncation
    title: Preprocessors
  title: Base classes
- isExpanded: false
  sections:
  - sections:
    - local: pipeline_tutorial
      title: Pipeline
    - local: pipeline_gradio
      title: Machine learning apps
    - local: pipeline_webserver
      title: Web server inference
    - local: add_new_pipeline
      title: Adding a new pipeline
    title: Pipeline API
  - sections:
    - local: llm_tutorial
      title: Text generation
    - local: generation_strategies
      title: Generation strategies
    - local: generation_features
      title: Generation features
    - local: tasks/prompting
      title: Prompt engineering
    - local: llm_optims
      title: Optimizing inference
    - local: cache_explanation
      title: Caching
    - local: kv_cache
      title: KV cache strategies
    - local: serving
      title: Serving
    - local: llm_tutorial_optimization
      title: Getting the most out of LLMs
    - local: perplexity
      title: Perplexity of fixed-length models
    title: LLMs
  - sections:
    - local: conversations
      title: Chat basics
    - local: chat_templating
      title: Templates
    - local: chat_templating_multimodal
      title: Multimodal templates
    - local: chat_templating_writing
      title: Template writing
    - local: chat_extras
      title: Tools and RAG
    title: Chat with models
  - sections:
    - local: perf_torch_compile
      title: torch.compile
    - local: perf_infer_gpu_one
      title: GPU
    - local: perf_infer_gpu_multi
      title: Distributed GPU inference
    - local: perf_infer_cpu
      title: CPU
    - local: tf_xla
      title: XLA
    title: Optimization
  - local: agents
    title: Agents
  - local: tools
    title: Tools
  title: Inference
- isExpanded: false
  sections:
  - sections:
    - local: trainer
      title: Trainer
    - local: training
      title: Fine-tuning
    - local: optimizers
      title: Optimizers
    - local: hpo_train
      title: Hyperparameter search
    title: Trainer API
  - sections:
    - local: accelerator_selection
      title: Accelerator selection
    - local: accelerate
      title: Accelerate
    - local: fsdp
      title: FullyShardedDataParallel
    - local: deepspeed
      title: DeepSpeed
    - local: debugging
      title: Multi-GPU debugging
    - local: perf_train_cpu_many
      title: Distributed CPUs
    - local: perf_train_gpu_many
      title: Parallelism methods
    title: Distributed training
  - sections:
    - local: perf_train_gpu_one
      title: GPU
    - local: perf_train_cpu
      title: CPU
    - local: perf_train_tpu_tf
      title: TPU
    - local: perf_train_special
      title: Apple Silicon
    - local: perf_train_gaudi
      title: Intel Gaudi
    - local: perf_hardware
      title: Build your own machine
    title: Hardware
  - local: peft
    title: PEFT
  - local: model_memory_anatomy
    title: Model training anatomy
  title: Training
- isExpanded: false
  sections:
  - local: quantization/overview
    title: Overview
  - local: quantization/selecting
    title: Selecting a quantization method
  - local: quantization/concept_guide
    title: Quantization concepts
  - local: quantization/aqlm
    title: AQLM
  - local: quantization/auto_round
    title: AutoRound
  - local: quantization/awq
    title: AWQ
  - local: quantization/bitnet
    title: BitNet
  - local: quantization/bitsandbytes
    title: bitsandbytes
  - local: quantization/compressed_tensors
    title: compressed-tensors
  - local: quantization/eetq
    title: EETQ
  - local: quantization/fbgemm_fp8
    title: FBGEMM
  - local: quantization/finegrained_fp8
    title: Fine-grained FP8
  - local: gguf
    title: GGUF
  - local: quantization/gptq
    title: GPTQ
  - local: quantization/higgs
    title: HIGGS
  - local: quantization/hqq
    title: HQQ
  - local: quantization/optimum
    title: Optimum
  - local: quantization/quanto
    title: Quanto
  - local: quantization/quark
    title: Quark
  - local: quantization/torchao
    title: torchao
  - local: quantization/spqr
    title: SpQR
  - local: quantization/vptq
    title: VPTQ
  - local: quantization/contribute
    title: Contribute
  title: Quantization
- isExpanded: false
  sections:
  - local: serialization
    title: ONNX
  - local: tflite
    title: LiteRT
  - local: executorch
    title: ExecuTorch
  - local: torchscript
    title: TorchScript
  title: Export to production
- isExpanded: false
  sections:
  - sections:
    - sections:
      - local: tasks/sequence_classification
        title: Text classification
      - local: tasks/token_classification
        title: Token classification
      - local: tasks/question_answering
        title: Question answering
      - local: tasks/language_modeling
        title: Causal language modeling
      - local: tasks/masked_language_modeling
        title: Masked language modeling
      - local: tasks/translation
        title: Translation
      - local: tasks/summarization
        title: Summarization
      - local: tasks/multiple_choice
        title: Multiple choice
      title: Natural language processing
    - sections:
      - local: tasks/audio_classification
        title: Audio classification
      - local: tasks/asr
        title: Automatic speech recognition
      title: Audio
    - sections:
      - local: tasks/image_classification
        title: Image classification
      - local: tasks/semantic_segmentation
        title: Image segmentation
      - local: tasks/video_classification
        title: Video classification
      - local: tasks/object_detection
        title: Object detection
      - local: tasks/zero_shot_object_detection
        title: Zero-shot object detection
      - local: tasks/zero_shot_image_classification
        title: Zero-shot image classification
      - local: tasks/monocular_depth_estimation
        title: Depth estimation
      - local: tasks/image_to_image
        title: Image-to-Image
      - local: tasks/image_feature_extraction
        title: Image Feature Extraction
      - local: tasks/mask_generation
        title: Mask Generation
      - local: tasks/keypoint_detection
        title: Keypoint detection
      - local: tasks/knowledge_distillation_for_image_classification
        title: Knowledge Distillation for Computer Vision
      title: Computer vision
    - sections:
      - local: tasks/image_captioning
        title: Image captioning
      - local: tasks/document_question_answering
        title: Document Question Answering
      - local: tasks/visual_question_answering
        title: Visual Question Answering
      - local: tasks/text-to-speech
        title: Text to speech
      - local: tasks/idefics
        title: Image tasks with IDEFICS
      - local: tasks/image_text_to_text
        title: Image-text-to-text
      - local: tasks/video_text_to_text
        title: Video-text-to-text
      - local: tasks/visual_document_retrieval
        title: Visual Document Retrieval
      title: Multimodal
    title: Task recipes
  - local: run_scripts
    title: Training scripts
  - local: glossary
    title: Glossary
  - local: philosophy
    title: Philosophy
  - local: notebooks
    title: Notebooks with examples
  - local: community
    title: Community resources
  - local: troubleshooting
    title: Troubleshoot
  title: Resources
- isExpanded: false
  sections:
  - local: contributing
    title: Contribute to Transformers
  - local: testing
    title: Transformers model tests
  - local: pr_checks
    title: Pull request checks
  title: Contribute
- isExpanded: false
  sections:
  - sections:
    - local: model_doc/auto
      title: Auto Classes
    - local: main_classes/backbones
      title: Backbones
    - local: main_classes/callback
      title: Callbacks
    - local: main_classes/configuration
      title: Configuration
    - local: main_classes/data_collator
      title: Data Collator
    - local: main_classes/keras_callbacks
      title: Keras callbacks
    - local: main_classes/logging
      title: Logging
    - local: main_classes/model
      title: Models
    - local: main_classes/text_generation
      title: Text Generation
    - local: main_classes/onnx
      title: ONNX
    - local: main_classes/optimizer_schedules
      title: Optimization
    - local: main_classes/output
      title: Model outputs
    - local: main_classes/peft
      title: PEFT
    - local: main_classes/pipelines
      title: Pipelines
    - local: main_classes/processors
      title: Processors
    - local: main_classes/quantization
      title: Quantization
    - local: main_classes/tokenizer
      title: Tokenizer
    - local: main_classes/trainer
      title: Trainer
    - local: main_classes/deepspeed
      title: DeepSpeed
    - local: main_classes/executorch
      title: ExecuTorch
    - local: main_classes/feature_extractor
      title: Feature Extractor
    - local: main_classes/image_processor
      title: Image Processor
    - local: main_classes/video_processor
      title: Video Processor
    title: Main Classes
  - sections:
    - sections:
      - local: model_doc/albert
        title: ALBERT
      - local: model_doc/arcee
        title: Arcee
      - local: model_doc/bamba
        title: Bamba
      - local: model_doc/bart
        title: BART
      - local: model_doc/barthez
        title: BARThez
      - local: model_doc/bartpho
        title: BARTpho
      - local: model_doc/bert
        title: BERT
      - local: model_doc/bert-generation
        title: BertGeneration
      - local: model_doc/bert-japanese
        title: BertJapanese
      - local: model_doc/bertweet
        title: BERTweet
      - local: model_doc/big_bird
        title: BigBird
      - local: model_doc/bigbird_pegasus
        title: BigBirdPegasus
      - local: model_doc/biogpt
        title: BioGpt
      - local: model_doc/bitnet
        title: BitNet
      - local: model_doc/blenderbot
        title: Blenderbot
      - local: model_doc/blenderbot-small
        title: Blenderbot Small
      - local: model_doc/bloom
        title: BLOOM
      - local: model_doc/bort
        title: BORT
      - local: model_doc/byt5
        title: ByT5
      - local: model_doc/camembert
        title: CamemBERT
      - local: model_doc/canine
        title: CANINE
      - local: model_doc/codegen
        title: CodeGen
      - local: model_doc/code_llama
        title: CodeLlama
      - local: model_doc/cohere
        title: Cohere
      - local: model_doc/cohere2
        title: Cohere2
      - local: model_doc/convbert
        title: ConvBERT
      - local: model_doc/cpm
        title: CPM
      - local: model_doc/cpmant
        title: CPMANT
      - local: model_doc/ctrl
        title: CTRL
      - local: model_doc/dbrx
        title: DBRX
      - local: model_doc/deberta
        title: DeBERTa
      - local: model_doc/deberta-v2
        title: DeBERTa-v2
      - local: model_doc/deepseek_v3
        title: DeepSeek-V3
      - local: model_doc/dialogpt
        title: DialoGPT
      - local: model_doc/diffllama
        title: DiffLlama
      - local: model_doc/distilbert
        title: DistilBERT
      - local: model_doc/dots1
        title: dots1
      - local: model_doc/dpr
        title: DPR
      - local: model_doc/electra
        title: ELECTRA
      - local: model_doc/encoder-decoder
        title: Encoder Decoder Models
      - local: model_doc/ernie
        title: ERNIE
      - local: model_doc/ernie_m
        title: ErnieM
      - local: model_doc/esm
        title: ESM
      - local: model_doc/falcon
        title: Falcon
      - local: model_doc/falcon3
        title: Falcon3
      - local: model_doc/falcon_h1
        title: FalconH1
      - local: model_doc/falcon_mamba
        title: FalconMamba
      - local: model_doc/flan-t5
        title: FLAN-T5
      - local: model_doc/flan-ul2
        title: FLAN-UL2
      - local: model_doc/flaubert
        title: FlauBERT
      - local: model_doc/fnet
        title: FNet
      - local: model_doc/fsmt
        title: FSMT
      - local: model_doc/funnel
        title: Funnel Transformer
      - local: model_doc/fuyu
        title: Fuyu
      - local: model_doc/gemma
        title: Gemma
      - local: model_doc/gemma2
        title: Gemma2
      - local: model_doc/glm
        title: GLM
      - local: model_doc/glm4
        title: glm4
      - local: model_doc/openai-gpt
        title: GPT
      - local: model_doc/gpt_neo
        title: GPT Neo
      - local: model_doc/gpt_neox
        title: GPT NeoX
      - local: model_doc/gpt_neox_japanese
        title: GPT NeoX Japanese
      - local: model_doc/gptj
        title: GPT-J
      - local: model_doc/gpt2
        title: GPT2
      - local: model_doc/gpt_bigcode
        title: GPTBigCode
      - local: model_doc/gptsan-japanese
        title: GPTSAN Japanese
      - local: model_doc/gpt-sw3
        title: GPTSw3
      - local: model_doc/granite
        title: Granite
      - local: model_doc/granitemoe
        title: GraniteMoe
      - local: model_doc/granitemoehybrid
        title: GraniteMoeHybrid
      - local: model_doc/granitemoeshared
        title: GraniteMoeShared
      - local: model_doc/helium
        title: Helium
      - local: model_doc/herbert
        title: HerBERT
      - local: model_doc/hgnet_v2
        title: HGNet-V2
      - local: model_doc/ibert
        title: I-BERT
      - local: model_doc/jamba
        title: Jamba
      - local: model_doc/jetmoe
        title: JetMoe
      - local: model_doc/jukebox
        title: Jukebox
      - local: model_doc/led
        title: LED
      - local: model_doc/llama
        title: LLaMA
      - local: model_doc/llama2
        title: Llama2
      - local: model_doc/llama3
        title: Llama3
      - local: model_doc/longformer
        title: Longformer
      - local: model_doc/longt5
        title: LongT5
      - local: model_doc/luke
        title: LUKE
      - local: model_doc/m2m_100
        title: M2M100
      - local: model_doc/madlad-400
        title: MADLAD-400
      - local: model_doc/mamba
        title: Mamba
      - local: model_doc/mamba2
        title: Mamba2
      - local: model_doc/marian
        title: MarianMT
      - local: model_doc/markuplm
        title: MarkupLM
      - local: model_doc/mbart
        title: MBart and MBart-50
      - local: model_doc/mega
        title: MEGA
      - local: model_doc/megatron-bert
        title: MegatronBERT
      - local: model_doc/megatron_gpt2
        title: MegatronGPT2
      - local: model_doc/minimax
        title: MiniMax
      - local: model_doc/mistral
        title: Mistral
      - local: model_doc/mixtral
        title: Mixtral
      - local: model_doc/mluke
        title: mLUKE
      - local: model_doc/mobilebert
        title: MobileBERT
      - local: model_doc/modernbert
        title: ModernBert
      - local: model_doc/mpnet
        title: MPNet
      - local: model_doc/mpt
        title: MPT
      - local: model_doc/mra
        title: MRA
      - local: model_doc/mt5
        title: MT5
      - local: model_doc/mvp
        title: MVP
      - local: model_doc/myt5
        title: myt5
      - local: model_doc/nemotron
        title: Nemotron
      - local: model_doc/nezha
        title: NEZHA
      - local: model_doc/nllb
        title: NLLB
      - local: model_doc/nllb-moe
        title: NLLB-MoE
      - local: model_doc/nystromformer
        title: Nyströmformer
      - local: model_doc/olmo
        title: OLMo
      - local: model_doc/olmo2
        title: OLMo2
      - local: model_doc/olmoe
        title: OLMoE
      - local: model_doc/open-llama
        title: Open-Llama
      - local: model_doc/opt
        title: OPT
      - local: model_doc/pegasus
        title: Pegasus
      - local: model_doc/pegasus_x
        title: PEGASUS-X
      - local: model_doc/persimmon
        title: Persimmon
      - local: model_doc/phi
        title: Phi
      - local: model_doc/phi3
        title: Phi-3
      - local: model_doc/phimoe
        title: PhiMoE
      - local: model_doc/phobert
        title: PhoBERT
      - local: model_doc/plbart
        title: PLBart
      - local: model_doc/prophetnet
        title: ProphetNet
      - local: model_doc/qdqbert
        title: QDQBert
      - local: model_doc/qwen2
        title: Qwen2
      - local: model_doc/qwen2_moe
        title: Qwen2MoE
      - local: model_doc/qwen3
        title: Qwen3
      - local: model_doc/qwen3_moe
        title: Qwen3MoE
      - local: model_doc/rag
        title: RAG
      - local: model_doc/realm
        title: REALM
      - local: model_doc/recurrent_gemma
        title: RecurrentGemma
      - local: model_doc/reformer
        title: Reformer
      - local: model_doc/rembert
        title: RemBERT
      - local: model_doc/retribert
        title: RetriBERT
      - local: model_doc/roberta
        title: RoBERTa
      - local: model_doc/roberta-prelayernorm
        title: RoBERTa-PreLayerNorm
      - local: model_doc/roc_bert
        title: RoCBert
      - local: model_doc/roformer
        title: RoFormer
      - local: model_doc/rwkv
        title: RWKV
      - local: model_doc/splinter
        title: Splinter
      - local: model_doc/squeezebert
        title: SqueezeBERT
      - local: model_doc/stablelm
        title: StableLm
      - local: model_doc/starcoder2
        title: Starcoder2
      - local: model_doc/switch_transformers
        title: SwitchTransformers
      - local: model_doc/t5
        title: T5
      - local: model_doc/t5gemma
        title: T5Gemma
      - local: model_doc/t5v1.1
        title: T5v1.1
      - local: model_doc/tapex
        title: TAPEX
      - local: model_doc/transfo-xl
        title: Transformer XL
      - local: model_doc/ul2
        title: UL2
      - local: model_doc/umt5
        title: UMT5
      - local: model_doc/xmod
        title: X-MOD
      - local: model_doc/xglm
        title: XGLM
      - local: model_doc/xlm
        title: XLM
      - local: model_doc/xlm-prophetnet
        title: XLM-ProphetNet
      - local: model_doc/xlm-roberta
        title: XLM-RoBERTa
      - local: model_doc/xlm-roberta-xl
        title: XLM-RoBERTa-XL
      - local: model_doc/xlm-v
        title: XLM-V
      - local: model_doc/xlnet
        title: XLNet
      - local: model_doc/yoso
        title: YOSO
      - local: model_doc/zamba
        title: Zamba
      - local: model_doc/zamba2
        title: Zamba2
      title: Text models
    - sections:
      - local: model_doc/beit
        title: BEiT
      - local: model_doc/bit
        title: BiT
      - local: model_doc/conditional_detr
        title: Conditional DETR
      - local: model_doc/convnext
        title: ConvNeXT
      - local: model_doc/convnextv2
        title: ConvNeXTV2
      - local: model_doc/cvt
        title: CvT
      - local: model_doc/d_fine
        title: D-FINE
      - local: model_doc/dab-detr
        title: DAB-DETR
      - local: model_doc/deformable_detr
        title: Deformable DETR
      - local: model_doc/deit
        title: DeiT
      - local: model_doc/depth_anything
        title: Depth Anything
      - local: model_doc/depth_anything_v2
        title: Depth Anything V2
      - local: model_doc/depth_pro
        title: DepthPro
      - local: model_doc/deta
        title: DETA
      - local: model_doc/detr
        title: DETR
      - local: model_doc/dinat
        title: DiNAT
      - local: model_doc/dinov2
        title: DINOV2
      - local: model_doc/dinov2_with_registers
        title: DINOv2 with Registers
      - local: model_doc/dit
        title: DiT
      - local: model_doc/dpt
        title: DPT
      - local: model_doc/efficientformer
        title: EfficientFormer
      - local: model_doc/efficientnet
        title: EfficientNet
      - local: model_doc/focalnet
        title: FocalNet
      - local: model_doc/glpn
        title: GLPN
      - local: model_doc/hiera
        title: Hiera
      - local: model_doc/ijepa
        title: I-JEPA
      - local: model_doc/imagegpt
        title: ImageGPT
      - local: model_doc/levit
        title: LeViT
      - local: model_doc/lightglue
        title: LightGlue
      - local: model_doc/mask2former
        title: Mask2Former
      - local: model_doc/maskformer
        title: MaskFormer
      - local: model_doc/mlcd
        title: MLCD
      - local: model_doc/mobilenet_v1
        title: MobileNetV1
      - local: model_doc/mobilenet_v2
        title: MobileNetV2
      - local: model_doc/mobilevit
        title: MobileViT
      - local: model_doc/mobilevitv2
        title: MobileViTV2
      - local: model_doc/nat
        title: NAT
      - local: model_doc/poolformer
        title: PoolFormer
      - local: model_doc/prompt_depth_anything
        title: Prompt Depth Anything
      - local: model_doc/pvt
        title: Pyramid Vision Transformer (PVT)
      - local: model_doc/pvt_v2
        title: Pyramid Vision Transformer v2 (PVTv2)
      - local: model_doc/regnet
        title: RegNet
      - local: model_doc/resnet
        title: ResNet
      - local: model_doc/rt_detr
        title: RT-DETR
      - local: model_doc/rt_detr_v2
        title: RT-DETRv2
      - local: model_doc/segformer
        title: SegFormer
      - local: model_doc/seggpt
        title: SegGpt
      - local: model_doc/superglue
        title: SuperGlue
      - local: model_doc/superpoint
        title: SuperPoint
      - local: model_doc/swiftformer
        title: SwiftFormer
      - local: model_doc/swin
        title: Swin Transformer
      - local: model_doc/swinv2
        title: Swin Transformer V2
      - local: model_doc/swin2sr
        title: Swin2SR
      - local: model_doc/table-transformer
        title: Table Transformer
      - local: model_doc/textnet
        title: TextNet
      - local: model_doc/timm_wrapper
        title: Timm Wrapper
      - local: model_doc/upernet
        title: UperNet
      - local: model_doc/van
        title: VAN
      - local: model_doc/vit
        title: Vision Transformer (ViT)
      - local: model_doc/vit_hybrid
        title: ViT Hybrid
      - local: model_doc/vitdet
        title: ViTDet
      - local: model_doc/vit_mae
        title: ViTMAE
      - local: model_doc/vitmatte
        title: ViTMatte
      - local: model_doc/vit_msn
        title: ViTMSN
      - local: model_doc/vitpose
        title: ViTPose
      - local: model_doc/yolos
        title: YOLOS
      - local: model_doc/zoedepth
        title: ZoeDepth
      title: Vision models
    - sections:
      - local: model_doc/audio-spectrogram-transformer
        title: Audio Spectrogram Transformer
      - local: model_doc/bark
        title: Bark
      - local: model_doc/clap
        title: CLAP
      - local: model_doc/csm
        title: CSM
      - local: model_doc/dac
        title: dac
      - local: model_doc/dia
        title: Dia
      - local: model_doc/encodec
        title: EnCodec
      - local: model_doc/fastspeech2_conformer
        title: FastSpeech2Conformer
      - local: model_doc/granite_speech
        title: GraniteSpeech
      - local: model_doc/hubert
        title: Hubert
      - local: model_doc/kyutai_speech_to_text
        title: Kyutai Speech-To-Text
      - local: model_doc/mctct
        title: MCTCT
      - local: model_doc/mimi
        title: Mimi
      - local: model_doc/mms
        title: MMS
      - local: model_doc/moonshine
        title: Moonshine
      - local: model_doc/moshi
        title: Moshi
      - local: model_doc/musicgen
        title: MusicGen
      - local: model_doc/musicgen_melody
        title: MusicGen Melody
      - local: model_doc/pop2piano
        title: Pop2Piano
      - local: model_doc/seamless_m4t
        title: Seamless-M4T
      - local: model_doc/seamless_m4t_v2
        title: SeamlessM4T-v2
      - local: model_doc/sew
        title: SEW
      - local: model_doc/sew-d
        title: SEW-D
      - local: model_doc/speech_to_text
        title: Speech2Text
      - local: model_doc/speech_to_text_2
        title: Speech2Text2
      - local: model_doc/speecht5
        title: SpeechT5
      - local: model_doc/unispeech
        title: UniSpeech
      - local: model_doc/unispeech-sat
        title: UniSpeech-SAT
      - local: model_doc/univnet
        title: UnivNet
      - local: model_doc/vits
        title: VITS
      - local: model_doc/wav2vec2
        title: Wav2Vec2
      - local: model_doc/wav2vec2-bert
        title: Wav2Vec2-BERT
      - local: model_doc/wav2vec2-conformer
        title: Wav2Vec2-Conformer
      - local: model_doc/wav2vec2_phoneme
        title: Wav2Vec2Phoneme
      - local: model_doc/wavlm
        title: WavLM
      - local: model_doc/whisper
        title: Whisper
      - local: model_doc/xls_r
        title: XLS-R
      - local: model_doc/xlsr_wav2vec2
        title: XLSR-Wav2Vec2
      title: Audio models
    - sections:
      - local: model_doc/timesformer
        title: TimeSformer
      - local: model_doc/vjepa2
        title: V-JEPA 2
      - local: model_doc/videomae
        title: VideoMAE
      - local: model_doc/vivit
        title: ViViT
      title: Video models
    - sections:
      - local: model_doc/align
        title: ALIGN
      - local: model_doc/altclip
        title: AltCLIP
      - local: model_doc/aria
        title: Aria
      - local: model_doc/aya_vision
        title: AyaVision
      - local: model_doc/blip
        title: BLIP
      - local: model_doc/blip-2
        title: BLIP-2
      - local: model_doc/bridgetower
        title: BridgeTower
      - local: model_doc/bros
        title: BROS
      - local: model_doc/chameleon
        title: Chameleon
      - local: model_doc/chinese_clip
        title: Chinese-CLIP
      - local: model_doc/clip
        title: CLIP
      - local: model_doc/clipseg
        title: CLIPSeg
      - local: model_doc/clvp
        title: CLVP
      - local: model_doc/colpali
        title: ColPali
      - local: model_doc/colqwen2
        title: ColQwen2
      - local: model_doc/data2vec
        title: Data2Vec
      - local: model_doc/deplot
        title: DePlot
      - local: model_doc/donut
        title: Donut
      - local: model_doc/emu3
        title: Emu3
      - local: model_doc/flava
        title: FLAVA
      - local: model_doc/gemma3
        title: Gemma3
      - local: model_doc/gemma3n
        title: Gemma3n
      - local: model_doc/git
        title: GIT
      - local: model_doc/glm4v
        title: glm4v
      - local: model_doc/got_ocr2
        title: GOT-OCR2
      - local: model_doc/granitevision
        title: GraniteVision
      - local: model_doc/grounding-dino
        title: Grounding DINO
      - local: model_doc/groupvit
        title: GroupViT
      - local: model_doc/idefics
        title: IDEFICS
      - local: model_doc/idefics2
        title: Idefics2
      - local: model_doc/idefics3
        title: Idefics3
      - local: model_doc/instructblip
        title: InstructBLIP
      - local: model_doc/instructblipvideo
        title: InstructBlipVideo
      - local: model_doc/internvl
        title: InternVL
      - local: model_doc/janus
        title: Janus
      - local: model_doc/kosmos-2
        title: KOSMOS-2
      - local: model_doc/layoutlm
        title: LayoutLM
      - local: model_doc/layoutlmv2
        title: LayoutLMV2
      - local: model_doc/layoutlmv3
        title: LayoutLMV3
      - local: model_doc/layoutxlm
        title: LayoutXLM
      - local: model_doc/lilt
        title: LiLT
      - local: model_doc/llama4
        title: Llama4
      - local: model_doc/llava
        title: Llava
      - local: model_doc/llava_next
        title: LLaVA-NeXT
      - local: model_doc/llava_next_video
        title: LLaVa-NeXT-Video
      - local: model_doc/llava_onevision
        title: LLaVA-Onevision
      - local: model_doc/lxmert
        title: LXMERT
      - local: model_doc/matcha
        title: MatCha
      - local: model_doc/mgp-str
        title: MGP-STR
      - local: model_doc/mistral3
        title: Mistral3
      - local: model_doc/mllama
        title: mllama
      - local: model_doc/nougat
        title: Nougat
      - local: model_doc/omdet-turbo
        title: OmDet-Turbo
      - local: model_doc/oneformer
        title: OneFormer
      - local: model_doc/owlvit
        title: OWL-ViT
      - local: model_doc/owlv2
        title: OWLv2
      - local: model_doc/paligemma
        title: PaliGemma
      - local: model_doc/perceiver
        title: Perceiver
      - local: model_doc/phi4_multimodal
        title: Phi4 Multimodal
      - local: model_doc/pix2struct
        title: Pix2Struct
      - local: model_doc/pixtral
        title: Pixtral
      - local: model_doc/qwen2_5_omni
        title: Qwen2.5-Omni
      - local: model_doc/qwen2_5_vl
        title: Qwen2.5-VL
      - local: model_doc/qwen2_audio
        title: Qwen2Audio
      - local: model_doc/qwen2_vl
        title: Qwen2VL
      - local: model_doc/sam
        title: Segment Anything
      - local: model_doc/sam_hq
        title: Segment Anything High Quality
      - local: model_doc/shieldgemma2
        title: ShieldGemma2
      - local: model_doc/siglip
        title: SigLIP
      - local: model_doc/siglip2
        title: SigLIP2
      - local: model_doc/smollm3
        title: SmolLM3
      - local: model_doc/smolvlm
        title: SmolVLM
      - local: model_doc/speech-encoder-decoder
        title: Speech Encoder Decoder Models
      - local: model_doc/tapas
        title: TAPAS
      - local: model_doc/trocr
        title: TrOCR
      - local: model_doc/tvlt
        title: TVLT
      - local: model_doc/tvp
        title: TVP
      - local: model_doc/udop
        title: UDOP
      - local: model_doc/video_llava
        title: VideoLlava
      - local: model_doc/vilt
        title: ViLT
      - local: model_doc/vipllava
        title: VipLlava
      - local: model_doc/vision-encoder-decoder
        title: Vision Encoder Decoder Models
      - local: model_doc/vision-text-dual-encoder
        title: Vision Text Dual Encoder
      - local: model_doc/visual_bert
        title: VisualBERT
      - local: model_doc/xclip
        title: X-CLIP
      title: Multimodal models
    - sections:
      - local: model_doc/decision_transformer
        title: Decision Transformer
      - local: model_doc/trajectory_transformer
        title: Trajectory Transformer
      title: Reinforcement learning models
    - sections:
      - local: model_doc/autoformer
        title: Autoformer
      - local: model_doc/informer
        title: Informer
      - local: model_doc/patchtsmixer
        title: PatchTSMixer
      - local: model_doc/patchtst
        title: PatchTST
      - local: model_doc/time_series_transformer
        title: Time Series Transformer
      - local: model_doc/timesfm
        title: TimesFM
      title: Time series models
    - sections:
      - local: model_doc/graphormer
        title: Graphormer
      title: Graph models
    title: Models
  - sections:
    - local: internal/modeling_utils
      title: Custom Layers and Utilities
    - local: internal/model_debugging_utils
      title: Utilities for Model Debugging
    - local: internal/pipelines_utils
      title: Utilities for pipelines
    - local: internal/tokenization_utils
      title: Utilities for Tokenizers
    - local: internal/trainer_utils
      title: Utilities for Trainer
    - local: internal/generation_utils
      title: Utilities for Generation
    - local: internal/image_processing_utils
      title: Utilities for Image Processors
    - local: internal/audio_utils
      title: Utilities for Audio processing
    - local: internal/file_utils
      title: General Utilities
    - local: internal/import_utils
      title: Importing Utilities
    - local: internal/time_series_utils
      title: Utilities for Time Series
    title: Internal helpers
  - sections:
    - local: reference/environment_variables
      title: Environment Variables
    title: Reference
  title: API