diff --git a/.circleci/config.yml b/.circleci/config.yml
index ef49dc7e023..5616355415b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -184,6 +184,7 @@ jobs:
             - run: python utils/check_dummies.py
             - run: python utils/check_repo.py
             - run: python utils/check_inits.py
+            - run: python utils/check_pipeline_typing.py
             - run: python utils/check_config_docstrings.py
             - run: python utils/check_config_attributes.py
             - run: python utils/check_doctest_list.py
diff --git a/Makefile b/Makefile
index a7af9864826..bfc69d2bba5 100644
--- a/Makefile
+++ b/Makefile
@@ -40,6 +40,7 @@ repo-consistency:
 	python utils/check_dummies.py
 	python utils/check_repo.py
 	python utils/check_inits.py
+	python utils/check_pipeline_typing.py
 	python utils/check_config_docstrings.py
 	python utils/check_config_attributes.py
 	python utils/check_doctest_list.py
@@ -81,6 +82,7 @@ fix-copies:
 	python utils/check_copies.py --fix_and_overwrite
 	python utils/check_modular_conversion.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
+	python utils/check_pipeline_typing.py --fix_and_overwrite
 	python utils/check_doctest_list.py --fix_and_overwrite
 	python utils/check_docstrings.py --fix_and_overwrite
 
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index a62b3c3eab2..93a84f3bf61 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -564,6 +564,86 @@ def clean_custom_task(task_info):
     return task_info, None
 
 
+# <generated-code>
+# fmt: off
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#                       The part of the file below was automatically generated from the code.
+#           Do NOT edit this part of the file manually as any edits will be overwritten by the generation
+#           of the file. If any change should be done, please apply the changes to the `pipeline` function
+#            below and run `python utils/check_pipeline_typing.py --fix_and_overwrite` to update the file.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+
+from typing import Literal, overload
+
+
+@overload
+def pipeline(task: Literal[None], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> Pipeline: ...
+@overload
+def pipeline(task: Literal["audio-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> AudioClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["automatic-speech-recognition"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> AutomaticSpeechRecognitionPipeline: ...
+@overload
+def pipeline(task: Literal["depth-estimation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> DepthEstimationPipeline: ...
+@overload
+def pipeline(task: Literal["document-question-answering"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> DocumentQuestionAnsweringPipeline: ...
+@overload
+def pipeline(task: Literal["feature-extraction"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> FeatureExtractionPipeline: ...
+@overload
+def pipeline(task: Literal["fill-mask"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> FillMaskPipeline: ...
+@overload
+def pipeline(task: Literal["image-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["image-feature-extraction"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageFeatureExtractionPipeline: ...
+@overload
+def pipeline(task: Literal["image-segmentation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageSegmentationPipeline: ...
+@overload
+def pipeline(task: Literal["image-text-to-text"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageTextToTextPipeline: ...
+@overload
+def pipeline(task: Literal["image-to-image"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageToImagePipeline: ...
+@overload
+def pipeline(task: Literal["image-to-text"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ImageToTextPipeline: ...
+@overload
+def pipeline(task: Literal["mask-generation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> MaskGenerationPipeline: ...
+@overload
+def pipeline(task: Literal["object-detection"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ObjectDetectionPipeline: ...
+@overload
+def pipeline(task: Literal["question-answering"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> QuestionAnsweringPipeline: ...
+@overload
+def pipeline(task: Literal["summarization"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> SummarizationPipeline: ...
+@overload
+def pipeline(task: Literal["table-question-answering"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TableQuestionAnsweringPipeline: ...
+@overload
+def pipeline(task: Literal["text-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TextClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["text-generation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TextGenerationPipeline: ...
+@overload
+def pipeline(task: Literal["text-to-audio"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TextToAudioPipeline: ...
+@overload
+def pipeline(task: Literal["text2text-generation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> Text2TextGenerationPipeline: ...
+@overload
+def pipeline(task: Literal["token-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TokenClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["translation"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> TranslationPipeline: ...
+@overload
+def pipeline(task: Literal["video-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> VideoClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["visual-question-answering"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> VisualQuestionAnsweringPipeline: ...
+@overload
+def pipeline(task: Literal["zero-shot-audio-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ZeroShotAudioClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["zero-shot-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ZeroShotClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["zero-shot-image-classification"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ZeroShotImageClassificationPipeline: ...
+@overload
+def pipeline(task: Literal["zero-shot-object-detection"], model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, processor: Optional[Union[str, ProcessorMixin]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, token: Optional[Union[str, bool]] = None, device: Optional[Union[int, str, "torch.device"]] = None, device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: Optional[bool] = None, model_kwargs: Optional[Dict[str, Any]] = None, pipeline_class: Optional[Any] = None, **kwargs: Any) -> ZeroShotObjectDetectionPipeline: ...
+
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#                       The part of the file above was automatically generated from the code.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+# fmt: on
+# </generated-code>
+
+
 def pipeline(
     task: Optional[str] = None,
     model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None,
@@ -577,12 +657,12 @@ def pipeline(
     use_fast: bool = True,
     token: Optional[Union[str, bool]] = None,
     device: Optional[Union[int, str, "torch.device"]] = None,
-    device_map=None,
-    torch_dtype=None,
+    device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
+    torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
     trust_remote_code: Optional[bool] = None,
     model_kwargs: Optional[Dict[str, Any]] = None,
     pipeline_class: Optional[Any] = None,
-    **kwargs,
+    **kwargs: Any,
 ) -> Pipeline:
     """
     Utility factory method to build a [`Pipeline`].
diff --git a/src/transformers/pipelines/audio_classification.py b/src/transformers/pipelines/audio_classification.py
index 86dfe72d3c5..62813fbb5e1 100644
--- a/src/transformers/pipelines/audio_classification.py
+++ b/src/transformers/pipelines/audio_classification.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import subprocess
-from typing import Union
+from typing import Any, Dict, List, Union
 
 import numpy as np
 import requests
@@ -27,7 +27,7 @@ if is_torch_available():
 logger = logging.get_logger(__name__)
 
 
-def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
+def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.ndarray:
     """
     Helper function to read an audio file through ffmpeg.
     """
@@ -103,11 +103,7 @@ class AudioClassificationPipeline(Pipeline):
 
         self.check_model_type(MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES)
 
-    def __call__(
-        self,
-        inputs: Union[np.ndarray, bytes, str],
-        **kwargs,
-    ):
+    def __call__(self, inputs: Union[np.ndarray, bytes, str, dict], **kwargs: Any) -> List[Dict[str, Any]]:
         """
         Classify the sequence(s) given as inputs. See the [`AutomaticSpeechRecognitionPipeline`] documentation for more
         information.
diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
index cda375632a9..5fa177fe2ba 100644
--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import defaultdict
-from typing import TYPE_CHECKING, Dict, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
 import numpy as np
 import requests
@@ -211,11 +211,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
 
         super().__init__(model, tokenizer, feature_extractor, device=device, torch_dtype=torch_dtype, **kwargs)
 
-    def __call__(
-        self,
-        inputs: Union[np.ndarray, bytes, str],
-        **kwargs,
-    ):
+    def __call__(self, inputs: Union[np.ndarray, bytes, str, dict], **kwargs: Any) -> List[Dict[str, Any]]:
         """
         Transcribe the audio sequence(s) given as inputs to text. See the [`AutomaticSpeechRecognitionPipeline`]
         documentation for more information.
diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py
index 2203ac09c9c..235daf2cc2c 100644
--- a/src/transformers/pipelines/depth_estimation.py
+++ b/src/transformers/pipelines/depth_estimation.py
@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import Any, Dict, List, Union, overload
 
 from ..utils import (
     add_end_docstrings,
@@ -52,7 +52,15 @@ class DepthEstimationPipeline(Pipeline):
         requires_backends(self, "vision")
         self.check_model_type(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES)
 
-    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
+    @overload
+    def __call__(self, inputs: Union[str, "Image.Image"], **kwargs: Any) -> Dict[str, Any]: ...
+
+    @overload
+    def __call__(self, inputs: List[Union[str, "Image.Image"]], **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    def __call__(
+        self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs: Any
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
         """
         Predict the depth(s) of the image(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
index 6537743dd65..2d70a3534df 100644
--- a/src/transformers/pipelines/document_question_answering.py
+++ b/src/transformers/pipelines/document_question_answering.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import re
-from typing import List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, overload
 
 import numpy as np
 
@@ -209,13 +209,28 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
 
         return preprocess_params, forward_params, postprocess_params
 
+    @overload
     def __call__(
         self,
         image: Union["Image.Image", str],
+        question: str,
+        word_boxes: Optional[Tuple[str, List[float]]] = None,
+        **kwargs: Any,
+    ) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, image: Dict[str, Any], **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, image: List[Dict[str, Any]], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(
+        self,
+        image: Union["Image.Image", str, List[Dict[str, Any]]],
         question: Optional[str] = None,
         word_boxes: Optional[Tuple[str, List[float]]] = None,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
         """
         Answer the question(s) given as inputs by using the document(s). A document is defined as an image and an
         optional list of (word, box) tuples which represent the text in the document. If the `word_boxes` are not
diff --git a/src/transformers/pipelines/feature_extraction.py b/src/transformers/pipelines/feature_extraction.py
index 7d67a615ac0..70971d83b20 100644
--- a/src/transformers/pipelines/feature_extraction.py
+++ b/src/transformers/pipelines/feature_extraction.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Any, Dict, List, Union
 
 from ..utils import add_end_docstrings
 from .base import GenericTensor, Pipeline, build_pipeline_init_args
@@ -73,9 +73,9 @@ class FeatureExtractionPipeline(Pipeline):
         elif self.framework == "tf":
             return model_outputs[0].numpy().tolist()
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args: Union[str, List[str]], **kwargs: Any) -> Union[Any, List[Any]]:
         """
-        Extract the features of the input(s).
+        Extract the features of the input(s) text.
 
         Args:
             args (`str` or `List[str]`): One or several texts (or one list of texts) to get the features of.
diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
index c14f5411848..18c225b1c5a 100644
--- a/src/transformers/pipelines/fill_mask.py
+++ b/src/transformers/pipelines/fill_mask.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Any, Dict, List, Union, overload
 
 import numpy as np
 
@@ -245,7 +245,15 @@ class FillMaskPipeline(Pipeline):
             )
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, inputs, **kwargs):
+    @overload
+    def __call__(self, inputs: str, **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, inputs: List[str], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(
+        self, inputs: Union[str, List[str]], **kwargs: Any
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Fill the masked token in the text(s) given as inputs.
 
diff --git a/src/transformers/pipelines/image_classification.py b/src/transformers/pipelines/image_classification.py
index a9b318df552..bd179a5a667 100644
--- a/src/transformers/pipelines/image_classification.py
+++ b/src/transformers/pipelines/image_classification.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Union
+from typing import Any, Dict, List, Union, overload
 
 import numpy as np
 
@@ -122,7 +122,15 @@ class ImageClassificationPipeline(Pipeline):
             postprocess_params["function_to_apply"] = function_to_apply
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
+    @overload
+    def __call__(self, inputs: Union[str, "Image.Image"], **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, inputs: Union[List[str], List["Image.Image"]], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(
+        self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs: Any
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Assign labels to the image(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/image_feature_extraction.py b/src/transformers/pipelines/image_feature_extraction.py
index 391eb2b3aec..ef8d6e6a00a 100644
--- a/src/transformers/pipelines/image_feature_extraction.py
+++ b/src/transformers/pipelines/image_feature_extraction.py
@@ -1,10 +1,12 @@
-from typing import Dict
+from typing import Any, Dict, List, Union
 
 from ..utils import add_end_docstrings, is_vision_available
 from .base import GenericTensor, Pipeline, build_pipeline_init_args
 
 
 if is_vision_available():
+    from PIL import Image
+
     from ..image_utils import load_image
 
 
@@ -88,7 +90,7 @@ class ImageFeatureExtractionPipeline(Pipeline):
         elif self.framework == "tf":
             return outputs.numpy().tolist()
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args: Union[str, "Image.Image", List["Image.Image"], List[str]], **kwargs: Any) -> List[Any]:
         """
         Extract the features of the input(s).
 
diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py
index d388e591bf9..ef596e2f533 100644
--- a/src/transformers/pipelines/image_segmentation.py
+++ b/src/transformers/pipelines/image_segmentation.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Union, overload
 
 import numpy as np
 
@@ -23,10 +23,6 @@ if is_torch_available():
 logger = logging.get_logger(__name__)
 
 
-Prediction = Dict[str, Any]
-Predictions = List[Prediction]
-
-
 @add_end_docstrings(build_pipeline_init_args(has_image_processor=True))
 class ImageSegmentationPipeline(Pipeline):
     """
@@ -94,7 +90,15 @@ class ImageSegmentationPipeline(Pipeline):
 
         return preprocess_kwargs, {}, postprocess_kwargs
 
-    def __call__(self, inputs=None, **kwargs) -> Union[Predictions, List[Prediction]]:
+    @overload
+    def __call__(self, inputs: Union[str, "Image.Image"], **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, inputs: Union[List[str], List["Image.Image"]], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(
+        self, inputs: Union[str, "Image.Image", List[str], List["Image.Image"]], **kwargs: Any
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Perform segmentation (detect masks & classes) in the image(s) passed as inputs.
 
@@ -123,9 +127,8 @@ class ImageSegmentationPipeline(Pipeline):
                 the call may block forever.
 
         Return:
-            A dictionary or a list of dictionaries containing the result. If the input is a single image, will return a
-            list of dictionaries, if the input is a list of several images, will return a list of list of dictionaries
-            corresponding to each image.
+            If the input is a single image, will return a list of dictionaries, if the input is a list of several images,
+            will return a list of list of dictionaries corresponding to each image.
 
             The dictionaries contain the mask, label and score (where applicable) of each detected object and contains
             the following keys:
diff --git a/src/transformers/pipelines/image_text_to_text.py b/src/transformers/pipelines/image_text_to_text.py
index 330aa683adf..23f914d48c1 100644
--- a/src/transformers/pipelines/image_text_to_text.py
+++ b/src/transformers/pipelines/image_text_to_text.py
@@ -15,7 +15,7 @@
 
 import enum
 from collections.abc import Iterable  # pylint: disable=g-importing-member
-from typing import Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, overload
 
 from ..generation import GenerationConfig
 from ..processing_utils import ProcessingKwargs, Unpack
@@ -251,6 +251,22 @@ class ImageTextToTextPipeline(Pipeline):
             generate_kwargs["eos_token_id"] = stop_sequence_ids[0]
         return preprocess_params, forward_kwargs, postprocess_params
 
+    @overload
+    def __call__(
+        self,
+        image: Optional[Union[str, "Image.Image"]] = None,
+        text: Optional[str] = None,
+        **kwargs: Any,
+    ) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(
+        self,
+        image: Optional[Union[List[str], List["Image.Image"]]] = None,
+        text: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[List[Dict[str, Any]]]: ...
+
     def __call__(
         self,
         images: Optional[
@@ -266,7 +282,7 @@ class ImageTextToTextPipeline(Pipeline):
         ] = None,
         text: Optional[Union[str, List[str], List[dict]]] = None,
         **kwargs,
-    ):
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Generate a text given text and the image(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py
index cb66359a4dd..23fe98ef9e9 100644
--- a/src/transformers/pipelines/image_to_image.py
+++ b/src/transformers/pipelines/image_to_image.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Union
+from typing import Any, List, Union, overload
 
 import numpy as np
 
@@ -84,8 +84,14 @@ class ImageToImagePipeline(Pipeline):
 
         return preprocess_params, forward_params, postprocess_params
 
+    @overload
+    def __call__(self, images: Union[str, "Image.Image"], **kwargs: Any) -> "Image.Image": ...
+
+    @overload
+    def __call__(self, images: Union[List[str], List["Image.Image"]], **kwargs: Any) -> List["Image.Image"]: ...
+
     def __call__(
-        self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs
+        self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs: Any
     ) -> Union["Image.Image", List["Image.Image"]]:
         """
         Transform the image(s) passed as inputs.
diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py
index f8fda5d2fd9..62942fb96d8 100644
--- a/src/transformers/pipelines/image_to_text.py
+++ b/src/transformers/pipelines/image_to_text.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Union
+from typing import Any, Dict, List, Union, overload
 
 from ..generation import GenerationConfig
 from ..utils import (
@@ -111,7 +111,13 @@ class ImageToTextPipeline(Pipeline):
 
         return preprocess_params, forward_params, {}
 
-    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
+    @overload
+    def __call__(self, inputs: Union[str, "Image.Image"], **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, inputs: Union[List[str], List["Image.Image"]], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
         """
         Assign labels to the image(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/mask_generation.py b/src/transformers/pipelines/mask_generation.py
index 5c0c5e72c81..5f32b8df8ad 100644
--- a/src/transformers/pipelines/mask_generation.py
+++ b/src/transformers/pipelines/mask_generation.py
@@ -1,5 +1,5 @@
 from collections import defaultdict
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, overload
 
 from ..image_utils import load_image
 from ..utils import (
@@ -16,6 +16,9 @@ if is_torch_available():
 
     from ..models.auto.modeling_auto import MODEL_FOR_MASK_GENERATION_MAPPING_NAMES
 
+if TYPE_CHECKING:
+    from PIL import Image
+
 logger = logging.get_logger(__name__)
 
 
@@ -125,12 +128,22 @@ class MaskGenerationPipeline(ChunkPipeline):
             postprocess_kwargs["output_bboxes_mask"] = kwargs["output_bboxes_mask"]
         return preprocess_kwargs, forward_params, postprocess_kwargs
 
-    def __call__(self, image, *args, num_workers=None, batch_size=None, **kwargs):
+    @overload
+    def __call__(self, image: Union[str, "Image.Image"], *args: Any, **kwargs: Any) -> Dict[str, Any]: ...
+
+    @overload
+    def __call__(
+        self, image: Union[List[str], List["Image.Image"]], *args: Any, **kwargs: Any
+    ) -> List[Dict[str, Any]]: ...
+
+    def __call__(
+        self, image: Union[str, "Image.Image", List[str], List["Image.Image"]], *args: Any, **kwargs: Any
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
         """
         Generates binary segmentation masks
 
         Args:
-            inputs (`np.ndarray` or `bytes` or `str` or `dict`):
+            image (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                 Image or list of images.
             mask_threshold (`float`, *optional*, defaults to 0.0):
                 Threshold to use when turning the predicted masks into binary values.
@@ -163,6 +176,8 @@ class MaskGenerationPipeline(ChunkPipeline):
                   the "object" described by the label and the mask.
 
         """
+        num_workers = kwargs.pop("num_workers", None)
+        batch_size = kwargs.pop("batch_size", None)
         return super().__call__(image, *args, num_workers=num_workers, batch_size=batch_size, **kwargs)
 
     def preprocess(
diff --git a/src/transformers/pipelines/object_detection.py b/src/transformers/pipelines/object_detection.py
index c84f17b2bd6..3b67ee49a08 100644
--- a/src/transformers/pipelines/object_detection.py
+++ b/src/transformers/pipelines/object_detection.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Union, overload
 
 from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends
 from .base import Pipeline, build_pipeline_init_args
@@ -16,13 +16,12 @@ if is_torch_available():
         MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
     )
 
+if TYPE_CHECKING:
+    from PIL import Image
+
 logger = logging.get_logger(__name__)
 
 
-Prediction = Dict[str, Any]
-Predictions = List[Prediction]
-
-
 @add_end_docstrings(build_pipeline_init_args(has_image_processor=True))
 class ObjectDetectionPipeline(Pipeline):
     """
@@ -69,7 +68,15 @@ class ObjectDetectionPipeline(Pipeline):
             postprocess_kwargs["threshold"] = kwargs["threshold"]
         return preprocess_params, {}, postprocess_kwargs
 
-    def __call__(self, *args, **kwargs) -> Union[Predictions, List[Prediction]]:
+    @overload
+    def __call__(self, image: Union[str, "Image.Image"], *args: Any, **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(
+        self, image: Union[List[str], List["Image.Image"]], *args: Any, **kwargs: Any
+    ) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(self, *args, **kwargs) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py
index c07c12551ee..7bd2e47195e 100644
--- a/src/transformers/pipelines/text2text_generation.py
+++ b/src/transformers/pipelines/text2text_generation.py
@@ -1,5 +1,6 @@
 import enum
 import warnings
+from typing import Any, Dict, List, Union
 
 from ..generation import GenerationConfig
 from ..tokenization_utils import TruncationStrategy
@@ -154,7 +155,7 @@ class Text2TextGenerationPipeline(Pipeline):
             del inputs["token_type_ids"]
         return inputs
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args: Union[str, List[str]], **kwargs: Any) -> List[Dict[str, str]]:
         r"""
         Generate the output text(s) using text(s) given as inputs.
 
diff --git a/src/transformers/pipelines/text_classification.py b/src/transformers/pipelines/text_classification.py
index dadb29c386b..a50174e8c95 100644
--- a/src/transformers/pipelines/text_classification.py
+++ b/src/transformers/pipelines/text_classification.py
@@ -1,6 +1,6 @@
 import inspect
 import warnings
-from typing import Dict
+from typing import Any, Dict, List, Union
 
 import numpy as np
 
@@ -120,7 +120,11 @@ class TextClassificationPipeline(Pipeline):
             postprocess_params["function_to_apply"] = function_to_apply
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, inputs, **kwargs):
+    def __call__(
+        self,
+        inputs: Union[str, List[str], Dict[str, str], List[Dict[str, str]]],
+        **kwargs: Any,
+    ) -> List[Dict[str, Any]]:
         """
         Classify the text(s) given as inputs.
 
@@ -148,7 +152,7 @@ class TextClassificationPipeline(Pipeline):
                 - `"none"`: Does not apply any function on the output.
 
         Return:
-            A list or a list of list of `dict`: Each result comes as list of dictionaries with the following keys:
+            A list of `dict`: Each result comes as list of dictionaries with the following keys:
 
             - **label** (`str`) -- The label predicted.
             - **score** (`float`) -- The corresponding probability.
diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py
index 7f29117a1ac..e9812a26af8 100644
--- a/src/transformers/pipelines/text_generation.py
+++ b/src/transformers/pipelines/text_generation.py
@@ -1,7 +1,7 @@
 import enum
 import itertools
 import types
-from typing import Dict
+from typing import Any, Dict, List, overload
 
 from ..generation import GenerationConfig
 from ..utils import ModelOutput, add_end_docstrings, is_tf_available, is_torch_available
@@ -19,6 +19,8 @@ if is_tf_available():
 
     from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
 
+ChatType = List[Dict[str, str]]
+
 
 class ReturnType(enum.Enum):
     TENSORS = 0
@@ -231,6 +233,18 @@ class TextGenerationPipeline(Pipeline):
 
         return super()._parse_and_tokenize(*args, **kwargs)
 
+    @overload
+    def __call__(self, text_inputs: str, **kwargs: Any) -> List[Dict[str, str]]: ...
+
+    @overload
+    def __call__(self, text_inputs: List[str], **kwargs: Any) -> List[List[Dict[str, str]]]: ...
+
+    @overload
+    def __call__(self, text_inputs: ChatType, **kwargs: Any) -> List[Dict[str, ChatType]]: ...
+
+    @overload
+    def __call__(self, text_inputs: List[ChatType], **kwargs: Any) -> List[List[Dict[str, ChatType]]]: ...
+
     def __call__(self, text_inputs, **kwargs):
         """
         Complete the prompt(s) given as inputs.
diff --git a/src/transformers/pipelines/text_to_audio.py b/src/transformers/pipelines/text_to_audio.py
index bb5d0d5ec38..e9ca2a74a6d 100644
--- a/src/transformers/pipelines/text_to_audio.py
+++ b/src/transformers/pipelines/text_to_audio.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.from typing import List, Union
-from typing import List, Union
+from typing import Any, Dict, List, Union, overload
 
 from ..generation import GenerationConfig
 from ..utils import is_torch_available
@@ -173,7 +173,15 @@ class TextToAudioPipeline(Pipeline):
 
         return output
 
-    def __call__(self, text_inputs: Union[str, List[str]], **forward_params):
+    @overload
+    def __call__(self, text_inputs: str, **forward_params: Any) -> Dict[str, Any]: ...
+
+    @overload
+    def __call__(self, text_inputs: List[str], **forward_params: Any) -> List[Dict[str, Any]]: ...
+
+    def __call__(
+        self, text_inputs: Union[str, List[str]], **forward_params
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
         """
         Generates speech/audio from the inputs. See the [`TextToAudioPipeline`] documentation for more information.
 
diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py
index 9256f238148..e8fded5530c 100644
--- a/src/transformers/pipelines/token_classification.py
+++ b/src/transformers/pipelines/token_classification.py
@@ -1,6 +1,6 @@
 import types
 import warnings
-from typing import List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, overload
 
 import numpy as np
 
@@ -217,7 +217,15 @@ class TokenClassificationPipeline(ChunkPipeline):
                     )
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, inputs: Union[str, List[str]], **kwargs):
+    @overload
+    def __call__(self, inputs: str, **kwargs: Any) -> List[Dict[str, str]]: ...
+
+    @overload
+    def __call__(self, inputs: List[str], **kwargs: Any) -> List[List[Dict[str, str]]]: ...
+
+    def __call__(
+        self, inputs: Union[str, List[str]], **kwargs: Any
+    ) -> Union[List[Dict[str, str]], List[List[Dict[str, str]]]]:
         """
         Classify each token of the text(s) given as inputs.
 
diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py
index 940dea10d76..dffc21fed2d 100644
--- a/src/transformers/pipelines/video_classification.py
+++ b/src/transformers/pipelines/video_classification.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import warnings
 from io import BytesIO
-from typing import List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, overload
 
 import requests
 
@@ -77,6 +77,12 @@ class VideoClassificationPipeline(Pipeline):
             postprocess_params["function_to_apply"] = "softmax"
         return preprocess_params, {}, postprocess_params
 
+    @overload
+    def __call__(self, inputs: str, **kwargs: Any) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, inputs: List[str], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
     def __call__(self, inputs: Optional[Union[str, List[str]]] = None, **kwargs):
         """
         Assign labels to the video(s) passed as inputs.
diff --git a/src/transformers/pipelines/zero_shot_audio_classification.py b/src/transformers/pipelines/zero_shot_audio_classification.py
index 8ed339a5b7f..a21e74e2092 100644
--- a/src/transformers/pipelines/zero_shot_audio_classification.py
+++ b/src/transformers/pipelines/zero_shot_audio_classification.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import UserDict
-from typing import Union
+from typing import Any, Dict, List, Union
 
 import numpy as np
 import requests
@@ -67,7 +67,7 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
             raise ValueError(f"The {self.__class__} is only available in PyTorch.")
         # No specific FOR_XXX available yet
 
-    def __call__(self, audios: Union[np.ndarray, bytes, str], **kwargs):
+    def __call__(self, audios: Union[np.ndarray, bytes, str, dict], **kwargs: Any) -> List[Dict[str, Any]]:
         """
         Assign labels to the audio(s) passed as inputs.
 
diff --git a/src/transformers/pipelines/zero_shot_image_classification.py b/src/transformers/pipelines/zero_shot_image_classification.py
index 7e49ba0efaa..bae5333735b 100644
--- a/src/transformers/pipelines/zero_shot_image_classification.py
+++ b/src/transformers/pipelines/zero_shot_image_classification.py
@@ -1,6 +1,6 @@
 import warnings
 from collections import UserDict
-from typing import List, Union
+from typing import Any, Dict, List, Union, overload
 
 from ..utils import (
     add_end_docstrings,
@@ -74,7 +74,22 @@ class ZeroShotImageClassificationPipeline(Pipeline):
             else MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES
         )
 
-    def __call__(self, image: Union[str, List[str], "Image", List["Image"]] = None, **kwargs):
+    @overload
+    def __call__(
+        self, image: Union[str, "Image.Image"], candidate_labels: List[str], **kwargs: Any
+    ) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(
+        self, image: Union[List[str], List["Image.Image"]], candidate_labels: List[str], **kwargs: Any
+    ) -> List[List[Dict[str, Any]]]: ...
+
+    def __call__(
+        self,
+        image: Union[str, List[str], "Image.Image", List["Image.Image"]],
+        candidate_labels: List[str],
+        **kwargs: Any,
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Assign labels to the image(s) passed as inputs.
 
@@ -110,7 +125,7 @@ class ZeroShotImageClassificationPipeline(Pipeline):
             image = kwargs.pop("images")
         if image is None:
             raise ValueError("Cannot call the zero-shot-image-classification pipeline without an images argument!")
-        return super().__call__(image, **kwargs)
+        return super().__call__(image, candidate_labels=candidate_labels, **kwargs)
 
     def _sanitize_parameters(self, tokenizer_kwargs=None, **kwargs):
         preprocess_params = {}
diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py
index 5385d84db76..2a92b8d152d 100644
--- a/src/transformers/pipelines/zero_shot_object_detection.py
+++ b/src/transformers/pipelines/zero_shot_object_detection.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, overload
 
 from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends
 from .base import ChunkPipeline, build_pipeline_init_args
@@ -62,12 +62,20 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline):
         requires_backends(self, "vision")
         self.check_model_type(MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES)
 
+    @overload
+    def __call__(
+        self, image: Union[str, "Image.Image"], candidate_labels: Union[str, List[str]], **kwargs: Any
+    ) -> List[Dict[str, Any]]: ...
+
+    @overload
+    def __call__(self, image: List[Dict[str, Any]], **kwargs: Any) -> List[List[Dict[str, Any]]]: ...
+
     def __call__(
         self,
         image: Union[str, "Image.Image", List[Dict[str, Any]]],
         candidate_labels: Optional[Union[str, List[str]]] = None,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
         """
         Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
 
diff --git a/utils/check_pipeline_typing.py b/utils/check_pipeline_typing.py
new file mode 100644
index 00000000000..16dcf197a52
--- /dev/null
+++ b/utils/check_pipeline_typing.py
@@ -0,0 +1,93 @@
+import re
+
+from transformers.pipelines import SUPPORTED_TASKS, Pipeline
+
+
+HEADER = """
+# fmt: off
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#                       The part of the file below was automatically generated from the code.
+#           Do NOT edit this part of the file manually as any edits will be overwritten by the generation
+#           of the file. If any change should be done, please apply the changes to the `pipeline` function
+#            below and run `python utils/check_pipeline_typing.py --fix_and_overwrite` to update the file.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+
+from typing import Literal, overload
+
+
+"""
+
+FOOTER = """
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#                       The part of the file above was automatically generated from the code.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+# fmt: on
+"""
+
+TASK_PATTERN = "task: Optional[str] = None"
+
+
+def main(pipeline_file_path: str, fix_and_overwrite: bool = False):
+    with open(pipeline_file_path, "r") as file:
+        content = file.read()
+
+    # extract generated code in between <generated-code> and </generated-code>
+    current_generated_code = re.search(r"# <generated-code>(.*)# </generated-code>", content, re.DOTALL).group(1)
+    content_without_generated_code = content.replace(current_generated_code, "")
+
+    # extract pipeline signature in between `def pipeline` and `-> Pipeline`
+    pipeline_signature = re.search(r"def pipeline(.*) -> Pipeline:", content_without_generated_code, re.DOTALL).group(
+        1
+    )
+    pipeline_signature = pipeline_signature.replace("(\n    ", "(")  # start of the signature
+    pipeline_signature = pipeline_signature.replace(",\n    ", ", ")  # intermediate arguments
+    pipeline_signature = pipeline_signature.replace(",\n)", ")")  # end of the signature
+
+    # collect and sort available pipelines
+    pipelines = [(f'"{task}"', task_info["impl"]) for task, task_info in SUPPORTED_TASKS.items()]
+    pipelines = sorted(pipelines, key=lambda x: x[0])
+    pipelines.insert(0, (None, Pipeline))
+
+    # generate new `pipeline` signatures
+    new_generated_code = ""
+    for task, pipeline_class in pipelines:
+        if TASK_PATTERN not in pipeline_signature:
+            raise ValueError(f"Can't find `{TASK_PATTERN}` in pipeline signature: {pipeline_signature}")
+        pipeline_type = pipeline_class if isinstance(pipeline_class, str) else pipeline_class.__name__
+        new_pipeline_signature = pipeline_signature.replace(TASK_PATTERN, f"task: Literal[{task}]")
+        new_generated_code += f"@overload\ndef pipeline{new_pipeline_signature} -> {pipeline_type}: ...\n"
+
+    new_generated_code = HEADER + new_generated_code + FOOTER
+    new_generated_code = new_generated_code.rstrip("\n") + "\n"
+
+    if new_generated_code != current_generated_code and fix_and_overwrite:
+        print(f"Updating {pipeline_file_path}...")
+        wrapped_current_generated_code = "# <generated-code>" + current_generated_code + "# </generated-code>"
+        wrapped_new_generated_code = "# <generated-code>" + new_generated_code + "# </generated-code>"
+        content = content.replace(wrapped_current_generated_code, wrapped_new_generated_code)
+
+        # write content to file
+        with open(pipeline_file_path, "w") as file:
+            file.write(content)
+
+    elif new_generated_code != current_generated_code and not fix_and_overwrite:
+        message = (
+            f"Found inconsistencies in {pipeline_file_path}. "
+            "Run `python utils/check_pipeline_typing.py --fix_and_overwrite` to fix them."
+        )
+        raise ValueError(message)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
+    parser.add_argument(
+        "--pipeline_file_path",
+        type=str,
+        default="src/transformers/pipelines/__init__.py",
+        help="Path to the pipeline file.",
+    )
+    args = parser.parse_args()
+    main(args.pipeline_file_path, args.fix_and_overwrite)