mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-23 14:29:01 +06:00
Clarify description of the is_split_into_words argument (#11449)
* Improve documentation for is_split_into_words argument * Change description wording
This commit is contained in:
parent
ab2cabb964
commit
6715e3b6a1
@ -172,8 +172,9 @@ TAPAS_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
|||||||
length is required by one of the truncation/padding parameters. If the model has no specific maximum
|
length is required by one of the truncation/padding parameters. If the model has no specific maximum
|
||||||
input length (like XLNet) truncation/padding to a maximum length will be deactivated.
|
input length (like XLNet) truncation/padding to a maximum length will be deactivated.
|
||||||
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
Whether or not the input is already pre-tokenized (e.g., split into words), in which case the tokenizer
|
Whether or not the input is already pre-tokenized (e.g., split into words). If set to :obj:`True`,
|
||||||
will skip the pre-tokenization step. This is useful for NER or token classification.
|
the tokenizer assumes the input is already split into words (for instance, by splitting it on
|
||||||
|
whitespace) which it will tokenize. This is useful for NER or token classification.
|
||||||
pad_to_multiple_of (:obj:`int`, `optional`):
|
pad_to_multiple_of (:obj:`int`, `optional`):
|
||||||
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
|
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
|
||||||
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
|
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
|
||||||
|
@ -643,7 +643,9 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
text (:obj:`str`):
|
text (:obj:`str`):
|
||||||
The text to prepare.
|
The text to prepare.
|
||||||
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
Whether or not the text has been pretokenized.
|
Whether or not the input is already pre-tokenized (e.g., split into words). If set to :obj:`True`,
|
||||||
|
the tokenizer assumes the input is already split into words (for instance, by splitting it on
|
||||||
|
whitespace) which it will tokenize. This is useful for NER or token classification.
|
||||||
kwargs:
|
kwargs:
|
||||||
Keyword arguments to use for the tokenization.
|
Keyword arguments to use for the tokenization.
|
||||||
|
|
||||||
|
@ -1286,8 +1286,9 @@ ENCODE_KWARGS_DOCSTRING = r"""
|
|||||||
returned to provide some overlap between truncated and overflowing sequences. The value of this
|
returned to provide some overlap between truncated and overflowing sequences. The value of this
|
||||||
argument defines the number of overlapping tokens.
|
argument defines the number of overlapping tokens.
|
||||||
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
is_split_into_words (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
Whether or not the input is already pre-tokenized (e.g., split into words), in which case the tokenizer
|
Whether or not the input is already pre-tokenized (e.g., split into words). If set to :obj:`True`,
|
||||||
will skip the pre-tokenization step. This is useful for NER or token classification.
|
the tokenizer assumes the input is already split into words (for instance, by splitting it on
|
||||||
|
whitespace) which it will tokenize. This is useful for NER or token classification.
|
||||||
pad_to_multiple_of (:obj:`int`, `optional`):
|
pad_to_multiple_of (:obj:`int`, `optional`):
|
||||||
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
|
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
|
||||||
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
|
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
|
||||||
|
Loading…
Reference in New Issue
Block a user