mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Fix custom tokenizers test (#19052)
* Fix CI for custom tokenizers * Add nightly tests * Run CI, run! * Fix paths * Typos * Fix test
This commit is contained in:
parent
68bb33d770
commit
f7ce4f1ff7
@ -578,19 +578,45 @@ jobs:
|
|||||||
key: v0.5-custom_tokenizers-{{ checksum "setup.py" }}
|
key: v0.5-custom_tokenizers-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
- '~/.cache/pip'
|
- '~/.cache/pip'
|
||||||
|
- run: python utils/tests_fetcher.py | tee test_preparation.txt
|
||||||
|
- store_artifacts:
|
||||||
|
path: ~/transformers/test_preparation.txt
|
||||||
- run: |
|
- run: |
|
||||||
if [ -f test_list.txt ]; then
|
if [ -f test_list.txt ]; then
|
||||||
python -m pytest --max-worker-restart=0 -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py ./tests/test_tokenization_openai.py | tee tests_output.txt
|
python -m pytest --max-worker-restart=0 -s --make-reports=tests_custom_tokenizers ./tests/models/bert_japanese/test_tokenization_bert_japanese.py ./tests/models/openai/test_tokenization_openai.py ./tests/models/clip/test_tokenization_clip.py | tee tests_output.txt
|
||||||
fi
|
|
||||||
- run: |
|
|
||||||
if [ -f test_list.txt ]; then
|
|
||||||
python -m pytest -n 1 --max-worker-restart=0 tests/test_tokenization_clip.py --dist=loadfile -s --make-reports=tests_tokenization_clip --durations=100 | tee tests_output.txt
|
|
||||||
fi
|
fi
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
path: ~/transformers/tests_output.txt
|
path: ~/transformers/tests_output.txt
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
path: ~/transformers/reports
|
path: ~/transformers/reports
|
||||||
|
|
||||||
|
run_tests_custom_tokenizers_all:
|
||||||
|
working_directory: ~/transformers
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.7.12
|
||||||
|
environment:
|
||||||
|
RUN_CUSTOM_TOKENIZERS: yes
|
||||||
|
TRANSFORMERS_IS_CI: yes
|
||||||
|
PYTEST_TIMEOUT: 120
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- restore_cache:
|
||||||
|
keys:
|
||||||
|
- v0.5-custom_tokenizers-{{ checksum "setup.py" }}
|
||||||
|
- v0.5-{{ checksum "setup.py" }}
|
||||||
|
- run: pip install --upgrade pip
|
||||||
|
- run: pip install .[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]
|
||||||
|
- run: python -m unidic download
|
||||||
|
- save_cache:
|
||||||
|
key: v0.5-custom_tokenizers-{{ checksum "setup.py" }}
|
||||||
|
paths:
|
||||||
|
- '~/.cache/pip'
|
||||||
|
- run: python -m pytest --max-worker-restart=0 -s --make-reports=tests_custom_tokenizers ./tests/models/bert_japanese/test_tokenization_bert_japanese.py ./tests/models/openai/test_tokenization_openai.py ./tests/models/clip/test_tokenization_clip.py | tee tests_output.txt
|
||||||
|
- store_artifacts:
|
||||||
|
path: ~/transformers/tests_output.txt
|
||||||
|
- store_artifacts:
|
||||||
|
path: ~/transformers/reports
|
||||||
|
|
||||||
run_examples_torch:
|
run_examples_torch:
|
||||||
working_directory: ~/transformers
|
working_directory: ~/transformers
|
||||||
docker:
|
docker:
|
||||||
@ -1026,6 +1052,42 @@ jobs:
|
|||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
path: ~/transformers/reports
|
path: ~/transformers/reports
|
||||||
|
|
||||||
|
run_tests_layoutlmv2_and_v3_all:
|
||||||
|
working_directory: ~/transformers
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.7.12
|
||||||
|
environment:
|
||||||
|
OMP_NUM_THREADS: 1
|
||||||
|
TRANSFORMERS_IS_CI: yes
|
||||||
|
PYTEST_TIMEOUT: 120
|
||||||
|
resource_class: xlarge
|
||||||
|
parallelism: 1
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- restore_cache:
|
||||||
|
keys:
|
||||||
|
- v0.5-torch-{{ checksum "setup.py" }}
|
||||||
|
- v0.5-{{ checksum "setup.py" }}
|
||||||
|
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
||||||
|
- run: pip install --upgrade pip
|
||||||
|
- run: pip install .[torch,testing,vision]
|
||||||
|
- run: pip install torchvision
|
||||||
|
# The commit `36a65a0907d90ed591479b2ebaa8b61cfa0b4ef0` in `detectron2` break things.
|
||||||
|
# See https://github.com/facebookresearch/detectron2/commit/36a65a0907d90ed591479b2ebaa8b61cfa0b4ef0#comments.
|
||||||
|
# TODO: Revert this change back once the above issue is fixed.
|
||||||
|
- run: python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
|
||||||
|
- run: sudo apt install tesseract-ocr
|
||||||
|
- run: pip install pytesseract
|
||||||
|
- save_cache:
|
||||||
|
key: v0.5-torch-{{ checksum "setup.py" }}
|
||||||
|
paths:
|
||||||
|
- '~/.cache/pip'
|
||||||
|
- run: python -m pytest -n 1 --max-worker-restart=0 tests/models/*layoutlmv* --dist=loadfile -s --make-reports=tests_layoutlmv2_and_v3 --durations=100
|
||||||
|
- store_artifacts:
|
||||||
|
path: ~/transformers/tests_output.txt
|
||||||
|
- store_artifacts:
|
||||||
|
path: ~/transformers/reports
|
||||||
|
|
||||||
# TPU JOBS
|
# TPU JOBS
|
||||||
run_examples_tpu:
|
run_examples_tpu:
|
||||||
docker:
|
docker:
|
||||||
@ -1094,6 +1156,7 @@ workflows:
|
|||||||
- run_examples_torch_all
|
- run_examples_torch_all
|
||||||
- run_examples_tensorflow_all
|
- run_examples_tensorflow_all
|
||||||
- run_examples_flax_all
|
- run_examples_flax_all
|
||||||
|
- run_tests_custom_tokenizers_all
|
||||||
- run_tests_torch_and_tf_all
|
- run_tests_torch_and_tf_all
|
||||||
- run_tests_torch_and_flax_all
|
- run_tests_torch_and_flax_all
|
||||||
- run_tests_torch_all
|
- run_tests_torch_all
|
||||||
@ -1103,6 +1166,7 @@ workflows:
|
|||||||
- run_tests_pipelines_tf_all
|
- run_tests_pipelines_tf_all
|
||||||
- run_tests_onnxruntime_all
|
- run_tests_onnxruntime_all
|
||||||
- run_tests_hub_all
|
- run_tests_hub_all
|
||||||
|
- run_tests_layoutlmv2_and_v3_all
|
||||||
|
|
||||||
# tpu_testing_jobs:
|
# tpu_testing_jobs:
|
||||||
# triggers:
|
# triggers:
|
||||||
|
1
setup.py
1
setup.py
@ -236,6 +236,7 @@ class DepsTableUpdateCommand(Command):
|
|||||||
|
|
||||||
|
|
||||||
extras = {}
|
extras = {}
|
||||||
|
extras["blob"] = []
|
||||||
|
|
||||||
extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic")
|
extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic")
|
||||||
extras["sklearn"] = deps_list("scikit-learn")
|
extras["sklearn"] = deps_list("scikit-learn")
|
||||||
|
@ -45,6 +45,7 @@ from transformers import (
|
|||||||
SpecialTokensMixin,
|
SpecialTokensMixin,
|
||||||
Trainer,
|
Trainer,
|
||||||
TrainingArguments,
|
TrainingArguments,
|
||||||
|
is_flax_available,
|
||||||
is_tf_available,
|
is_tf_available,
|
||||||
is_tokenizers_available,
|
is_tokenizers_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
@ -2928,8 +2929,10 @@ class TokenizerTesterMixin:
|
|||||||
returned_tensor = "pt"
|
returned_tensor = "pt"
|
||||||
elif is_tf_available():
|
elif is_tf_available():
|
||||||
returned_tensor = "tf"
|
returned_tensor = "tf"
|
||||||
else:
|
elif is_flax_available():
|
||||||
returned_tensor = "jax"
|
returned_tensor = "jax"
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
if not tokenizer.pad_token or tokenizer.pad_token_id < 0:
|
if not tokenizer.pad_token or tokenizer.pad_token_id < 0:
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user