Change back pipeline signatures (#3105)

* Change back pipeline signatures * String types for non-imported objects
2025-07-31 02:02:21 +06:00 · 2020-03-06 17:26:18 -05:00 · 2020-03-06 17:26:18 -05:00 · 0ae91c80aa
commit 0ae91c80aa
parent d6df9a8ffe
1 changed files with 34 additions and 105 deletions
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@ -296,19 +296,13 @@ class Pipeline(_ScikitCompat):
    pickle format.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -334,12 +328,11 @@ class Pipeline(_ScikitCompat):
    """

    default_input_names = None
-    task = None

    def __init__(
        self,
-        model: Optional = None,
-        tokenizer: PreTrainedTokenizer = None,
+        model: Union["PreTrainedModel", "TFPreTrainedModel"],
+        tokenizer: PreTrainedTokenizer,
        modelcard: Optional[ModelCard] = None,
        framework: Optional[str] = None,
        args_parser: ArgumentHandler = None,
@ -350,8 +343,6 @@ class Pipeline(_ScikitCompat):
        if framework is None:
            framework = get_framework()

-        model, tokenizer = self.get_defaults(model, tokenizer, framework)
-
        self.model = model
        self.tokenizer = tokenizer
        self.modelcard = modelcard
@ -483,26 +474,6 @@ class Pipeline(_ScikitCompat):
        else:
            return predictions.numpy()

-    def get_defaults(self, model, tokenizer, framework):
-        task_defaults = SUPPORTED_TASKS[self.task]
-        if model is None:
-            if framework == "tf":
-                model = task_defaults["tf"].from_pretrained(task_defaults["default"]["model"]["tf"])
-            elif framework == "pt":
-                model = task_defaults["pt"].from_pretrained(task_defaults["default"]["model"]["pt"])
-            else:
-                raise ValueError("Provided framework should be either 'tf' for TensorFlow or 'pt' for PyTorch.")
-
-        if tokenizer is None:
-            default_tokenizer = task_defaults["default"]["tokenizer"]
-            if isinstance(default_tokenizer, tuple):
-                # For tuple we have (tokenizer name, {kwargs})
-                tokenizer = AutoTokenizer.from_pretrained(default_tokenizer[0], **default_tokenizer[1])
-            else:
-                tokenizer = AutoTokenizer.from_pretrained(default_tokenizer)
-
-        return model, tokenizer
-

 class FeatureExtractionPipeline(Pipeline):
    """
@ -518,19 +489,13 @@ class FeatureExtractionPipeline(Pipeline):
    `huggingface.co/models <https://huggingface.co/models>`__.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -546,12 +511,10 @@ class FeatureExtractionPipeline(Pipeline):
            on the associated CUDA device id.
    """

-    task = "feature-extraction"
-
    def __init__(
        self,
-        model: Optional = None,
-        tokenizer: PreTrainedTokenizer = None,
+        model: Union["PreTrainedModel", "TFPreTrainedModel"],
+        tokenizer: PreTrainedTokenizer,
        modelcard: Optional[ModelCard] = None,
        framework: Optional[str] = None,
        args_parser: ArgumentHandler = None,
@ -586,19 +549,13 @@ class TextClassificationPipeline(Pipeline):
    `huggingface.co/models <https://huggingface.co/models?search=&filter=text-classification>`__.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -614,8 +571,6 @@ class TextClassificationPipeline(Pipeline):
            on the associated CUDA device id.
    """

-    task = "sentiment-analysis"
-
    def __call__(self, *args, **kwargs):
        outputs = super().__call__(*args, **kwargs)
        scores = np.exp(outputs) / np.exp(outputs).sum(-1)
@ -638,19 +593,13 @@ class FillMaskPipeline(Pipeline):
    `huggingface.co/models <https://huggingface.co/models?search=&filter=lm-head>`__.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -666,12 +615,10 @@ class FillMaskPipeline(Pipeline):
            on the associated CUDA device id.
    """

-    task = "fill-mask"
-
    def __init__(
        self,
-        model: Optional = None,
-        tokenizer: PreTrainedTokenizer = None,
+        model: Union["PreTrainedModel", "TFPreTrainedModel"],
+        tokenizer: PreTrainedTokenizer,
        modelcard: Optional[ModelCard] = None,
        framework: Optional[str] = None,
        args_parser: ArgumentHandler = None,
@ -743,19 +690,13 @@ class NerPipeline(Pipeline):
    `huggingface.co/models <https://huggingface.co/models?search=&filter=token-classification>`__.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -769,19 +710,14 @@ class NerPipeline(Pipeline):
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
-
-    Example::
-
-        from transformers import pi
    """

    default_input_names = "sequences"
-    task = "ner"

    def __init__(
        self,
-        model: Optional = None,
-        tokenizer: PreTrainedTokenizer = None,
+        model: Union["PreTrainedModel", "TFPreTrainedModel"],
+        tokenizer: PreTrainedTokenizer,
        modelcard: Optional[ModelCard] = None,
        framework: Optional[str] = None,
        args_parser: ArgumentHandler = None,
@ -928,19 +864,13 @@ class QuestionAnsweringPipeline(Pipeline):
    `huggingface.co/models <https://huggingface.co/models?search=&filter=question-answering>`__.

    Arguments:
-        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
-            checkpoint identifier or an actual pre-trained model inheriting from
+        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
+            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
-        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
-            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
+            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
-
-            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
@ -957,12 +887,11 @@ class QuestionAnsweringPipeline(Pipeline):
    """

    default_input_names = "question,context"
-    task = "question-answering"

    def __init__(
        self,
-        model: Optional = None,
-        tokenizer: Optional[PreTrainedTokenizer] = None,
+        model: Union["PreTrainedModel", "TFPreTrainedModel"],
+        tokenizer: PreTrainedTokenizer,
        modelcard: Optional[ModelCard] = None,
        framework: Optional[str] = None,
        device: int = -1,