mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Changed processor documentation architecture. Added documentation for GLUE
This commit is contained in:
parent
c4ac7a76db
commit
ad4a393e2e
@ -4,42 +4,46 @@ Processors
|
||||
This library includes processors for several traditional tasks. These processors can be used to process a dataset into
|
||||
examples that can be fed to a model.
|
||||
|
||||
``GLUE``
|
||||
Processors
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
`General Language Understanding Evaluation (GLUE)<https://gluebenchmark.com/>`__ is a benchmark that evaluates
|
||||
All processors follow the same architecture which is that of the
|
||||
:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
|
||||
of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
|
||||
:members:
|
||||
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.utils.InputExample
|
||||
:members:
|
||||
|
||||
|
||||
GLUE
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
`General Language Understanding Evaluation (GLUE) <https://gluebenchmark.com/>`__ is a benchmark that evaluates
|
||||
the performance of models across a diverse set of existing NLU tasks. It was released together with the paper
|
||||
`GLUE: A multi-task benchmark and analysis platform for natural language understanding<https://openreview.net/pdf?id=rJ4km2R5t7>`__
|
||||
`GLUE: A multi-task benchmark and analysis platform for natural language understanding <https://openreview.net/pdf?id=rJ4km2R5t7>`__
|
||||
|
||||
This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched),
|
||||
CoLA, SST2, STSB, QQP, QNLI, RTE and WNLI.
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.MrpcProcessor
|
||||
:members:
|
||||
Those processors are:
|
||||
- :class:`~pytorch_transformers.data.processors.utils.MrpcProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.MnliProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.MnliMismatchedProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.Sst2Processor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.StsbProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.QqpProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.QnliProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.RteProcessor`
|
||||
- :class:`~pytorch_transformers.data.processors.utils.WnliProcessor`
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.MnliProcessor
|
||||
:members:
|
||||
Additionally, the following method can be used to load values from a data file and convert them to a list of
|
||||
:class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.MnliMismatchedProcessor
|
||||
:members:
|
||||
.. automethod:: pytorch_transformers.data.processors.glue.glue_convert_examples_to_features
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.ColaProcessor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.Sst2Processor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.StsbProcessor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.QqpProcessor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.QnliProcessor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.RteProcessor
|
||||
:members:
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.glue.WnliProcessor
|
||||
:members:
|
||||
Example usage
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -26,6 +26,7 @@ if is_tf_available():
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def glue_convert_examples_to_features(examples, tokenizer,
|
||||
max_length=512,
|
||||
task=None,
|
||||
@ -36,7 +37,27 @@ def glue_convert_examples_to_features(examples, tokenizer,
|
||||
pad_token_segment_id=0,
|
||||
mask_padding_with_zero=True):
|
||||
"""
|
||||
Loads a data file into a list of `InputBatch`s
|
||||
Loads a data file into a list of ``InputFeatures``
|
||||
|
||||
Args:
|
||||
examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
|
||||
tokenizer: Instance of a tokenizer that will tokenize the examples
|
||||
max_length: Maximum example length
|
||||
task: GLUE task
|
||||
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
|
||||
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
|
||||
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
|
||||
pad_token: Padding token
|
||||
pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
|
||||
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
|
||||
and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
|
||||
actual values)
|
||||
|
||||
Returns:
|
||||
If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
|
||||
containing the task-specific features. If the input is a list of ``InputExamples``, will return
|
||||
a list of task-specific ``InputFeatures`` which can be fed to the model.
|
||||
|
||||
"""
|
||||
is_tf_dataset = False
|
||||
if is_tf_available() and isinstance(examples, tf.data.Dataset):
|
||||
|
Loading…
Reference in New Issue
Block a user