mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
When using the new padding/truncation paradigm setting padding="max_length" + max_length=X actually pads the input up to max_length.
This result in every sample going through QA pipelines to be of size 384 whatever the actual input size is making the overall pipeline very slow. Signed-off-by: Morgan Funtowicz <funtowiczmo@gmail.com>
This commit is contained in:
parent
90fbc544ca
commit
1b00a9a2ff
@ -10,7 +10,7 @@ from tqdm import tqdm
|
|||||||
from ...file_utils import is_tf_available, is_torch_available
|
from ...file_utils import is_tf_available, is_torch_available
|
||||||
from ...tokenization_bert import whitespace_tokenize
|
from ...tokenization_bert import whitespace_tokenize
|
||||||
from .utils import DataProcessor
|
from .utils import DataProcessor
|
||||||
|
from ...tokenization_utils_base import PaddingStrategy
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
import torch
|
import torch
|
||||||
@ -137,7 +137,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q
|
|||||||
truncated_query if tokenizer.padding_side == "right" else span_doc_tokens,
|
truncated_query if tokenizer.padding_side == "right" else span_doc_tokens,
|
||||||
span_doc_tokens if tokenizer.padding_side == "right" else truncated_query,
|
span_doc_tokens if tokenizer.padding_side == "right" else truncated_query,
|
||||||
truncation="only_second" if tokenizer.padding_side == "right" else "only_first",
|
truncation="only_second" if tokenizer.padding_side == "right" else "only_first",
|
||||||
padding="max_length",
|
padding=PaddingStrategy.DO_NOT_PAD,
|
||||||
max_length=max_seq_length,
|
max_length=max_seq_length,
|
||||||
return_overflowing_tokens=True,
|
return_overflowing_tokens=True,
|
||||||
stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
|
stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
|
||||||
|
Loading…
Reference in New Issue
Block a user