mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-20 13:08:21 +06:00
[add-new-model-like] Robust search & proper outer '),' in tokenizer mapping (#38703)
* [add-new-model-like] Robust search & proper outer '),' in tokenizer mapping * code-style: arrange the importation in add_new_model_like.py * Apply style fixes --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
8340e8746e
commit
8ff22e9d3b
@ -12,6 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
# 1. Standard library
|
||||||
import difflib
|
import difflib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@ -28,7 +29,12 @@ import yaml
|
|||||||
|
|
||||||
from ..models import auto as auto_module
|
from ..models import auto as auto_module
|
||||||
from ..models.auto.configuration_auto import model_type_to_module_name
|
from ..models.auto.configuration_auto import model_type_to_module_name
|
||||||
from ..utils import is_flax_available, is_tf_available, is_torch_available, logging
|
from ..utils import (
|
||||||
|
is_flax_available,
|
||||||
|
is_tf_available,
|
||||||
|
is_torch_available,
|
||||||
|
logging,
|
||||||
|
)
|
||||||
from . import BaseTransformersCLICommand
|
from . import BaseTransformersCLICommand
|
||||||
from .add_fast_image_processor import add_fast_image_processor
|
from .add_fast_image_processor import add_fast_image_processor
|
||||||
|
|
||||||
@ -1009,10 +1015,11 @@ def insert_tokenizer_in_auto_module(old_model_patterns: ModelPatterns, new_model
|
|||||||
with open(TRANSFORMERS_PATH / "models" / "auto" / "tokenization_auto.py", "r", encoding="utf-8") as f:
|
with open(TRANSFORMERS_PATH / "models" / "auto" / "tokenization_auto.py", "r", encoding="utf-8") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
|
pattern_tokenizer = re.compile(r"^\s*TOKENIZER_MAPPING_NAMES\s*=\s*OrderedDict\b")
|
||||||
lines = content.split("\n")
|
lines = content.split("\n")
|
||||||
idx = 0
|
idx = 0
|
||||||
# First we get to the TOKENIZER_MAPPING_NAMES block.
|
# First we get to the TOKENIZER_MAPPING_NAMES block.
|
||||||
while not lines[idx].startswith(" TOKENIZER_MAPPING_NAMES = OrderedDict("):
|
while not pattern_tokenizer.search(lines[idx]):
|
||||||
idx += 1
|
idx += 1
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
@ -1024,9 +1031,12 @@ def insert_tokenizer_in_auto_module(old_model_patterns: ModelPatterns, new_model
|
|||||||
# Otherwise it takes several lines until we get to a "),"
|
# Otherwise it takes several lines until we get to a "),"
|
||||||
else:
|
else:
|
||||||
block = []
|
block = []
|
||||||
|
# should change to " )," instead of " ),"
|
||||||
while not lines[idx].startswith(" ),"):
|
while not lines[idx].startswith(" ),"):
|
||||||
block.append(lines[idx])
|
block.append(lines[idx])
|
||||||
idx += 1
|
idx += 1
|
||||||
|
# if the lines[idx] does start with " )," we still need it in our block
|
||||||
|
block.append(lines[idx])
|
||||||
block = "\n".join(block)
|
block = "\n".join(block)
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user