mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
Merge pull request #1831 from iedmrc/gpt2-tokenization-sum-func-replacement
sum() is replaced by itertools.chain.from_iterable()
This commit is contained in:
commit
1a237d7f42
@ -21,6 +21,7 @@ import os
|
|||||||
import json
|
import json
|
||||||
import six
|
import six
|
||||||
import copy
|
import copy
|
||||||
|
import itertools
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
from .file_utils import cached_path, is_tf_available, is_torch_available
|
from .file_utils import cached_path, is_tf_available, is_torch_available
|
||||||
@ -641,9 +642,9 @@ class PreTrainedTokenizer(object):
|
|||||||
tokenized_text += [sub_text]
|
tokenized_text += [sub_text]
|
||||||
text_list = tokenized_text
|
text_list = tokenized_text
|
||||||
|
|
||||||
return sum((self._tokenize(token, **kwargs) if token not \
|
return list(itertools.chain.from_iterable((self._tokenize(token, **kwargs) if token not \
|
||||||
in self.added_tokens_encoder and token not in self.all_special_tokens \
|
in self.added_tokens_encoder and token not in self.all_special_tokens \
|
||||||
else [token] for token in tokenized_text), [])
|
else [token] for token in tokenized_text)))
|
||||||
|
|
||||||
added_tokens = list(self.added_tokens_encoder.keys()) + self.all_special_tokens
|
added_tokens = list(self.added_tokens_encoder.keys()) + self.all_special_tokens
|
||||||
tokenized_text = split_on_tokens(added_tokens, text)
|
tokenized_text = split_on_tokens(added_tokens, text)
|
||||||
|
Loading…
Reference in New Issue
Block a user