diff --git a/.circleci/config.yml b/.circleci/config.yml index 97f5f25606e..7ca5f8121c5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,8 +13,6 @@ jobs: - run: sudo pip install --progress-bar off . - run: sudo pip install pytest codecov pytest-cov - run: sudo pip install tensorboardX scikit-learn - - run: sudo apt-get -y install libmecab-dev mecab mecab-ipadic-utf8 swig - - run: sudo pip install mecab-python3 - run: python -m pytest -sv ./transformers/tests/ --cov - run: codecov build_py3_torch: @@ -29,8 +27,6 @@ jobs: - run: sudo pip install --progress-bar off . - run: sudo pip install pytest codecov pytest-cov - run: sudo pip install tensorboardX scikit-learn - - run: sudo apt-get -y install libmecab-dev mecab mecab-ipadic-utf8 swig - - run: sudo pip install mecab-python3 - run: python -m pytest -sv ./transformers/tests/ --cov - run: python -m pytest -sv ./examples/ - run: codecov @@ -46,8 +42,6 @@ jobs: - run: sudo pip install --progress-bar off . - run: sudo pip install pytest codecov pytest-cov - run: sudo pip install tensorboardX scikit-learn - - run: sudo apt-get -y install libmecab-dev mecab mecab-ipadic-utf8 swig - - run: sudo pip install mecab-python3 - run: python -m pytest -sv ./transformers/tests/ --cov - run: codecov build_py2_torch: @@ -61,8 +55,6 @@ jobs: - run: sudo pip install torch - run: sudo pip install --progress-bar off . - run: sudo pip install pytest codecov pytest-cov - - run: sudo apt-get -y install libmecab-dev mecab mecab-ipadic-utf8 swig - - run: sudo pip install mecab-python - run: python -m pytest -sv ./transformers/tests/ --cov - run: codecov build_py2_tf: @@ -76,10 +68,18 @@ jobs: - run: sudo pip install tensorflow - run: sudo pip install --progress-bar off . - run: sudo pip install pytest codecov pytest-cov - - run: sudo apt-get -y install libmecab-dev mecab mecab-ipadic-utf8 swig - - run: sudo pip install mecab-python - run: python -m pytest -sv ./transformers/tests/ --cov - run: codecov + build_py3_custom_tokenizers: + working_directory: ~/transformers + docker: + - image: circleci/python:3.5 + steps: + - checkout + - run: sudo pip install --progress-bar off . + - run: sudo pip install pytest + - run: sudo pip install mecab-python3 + - run: python -m pytest -sv ./transformers/tests/tokenization_bert_japanese_test.py deploy_doc: working_directory: ~/transformers docker: diff --git a/transformers/tests/tokenization_bert_japanese_test.py b/transformers/tests/tokenization_bert_japanese_test.py index 6f66b96411a..545193c7cce 100644 --- a/transformers/tests/tokenization_bert_japanese_test.py +++ b/transformers/tests/tokenization_bert_japanese_test.py @@ -16,7 +16,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest -import pytest from io import open from transformers.tokenization_bert import WordpieceTokenizer @@ -25,8 +24,10 @@ from transformers.tokenization_bert_japanese import (BertJapaneseTokenizer, VOCAB_FILES_NAMES) from .tokenization_tests_commons import CommonTestCases +from .utils import slow, custom_tokenizers +@custom_tokenizers class BertJapaneseTokenizationTest(CommonTestCases.CommonTokenizerTester): tokenizer_class = BertJapaneseTokenizer @@ -104,7 +105,7 @@ class BertJapaneseTokenizationTest(CommonTestCases.CommonTokenizerTester): self.assertListEqual(tokenizer.tokenize(u"こんばんは こんばんにちは こんにちは"), [u"こん", u"##ばんは", u"[UNK]", u"こんにちは"]) - @pytest.mark.slow + @slow def test_sequence_builders(self): tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese") @@ -172,7 +173,7 @@ class BertJapaneseCharacterTokenizationTest(CommonTestCases.CommonTokenizerTeste self.assertListEqual(tokenizer.tokenize(u"こんにちほ"), [u"こ", u"ん", u"に", u"ち", u"[UNK]"]) - @pytest.mark.slow + @slow def test_sequence_builders(self): tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese-char") @@ -188,5 +189,3 @@ class BertJapaneseCharacterTokenizationTest(CommonTestCases.CommonTokenizerTeste -if __name__ == '__main__': - unittest.main() diff --git a/transformers/tests/utils.py b/transformers/tests/utils.py index 7a51ab612b6..2b97293ca7d 100644 --- a/transformers/tests/utils.py +++ b/transformers/tests/utils.py @@ -6,18 +6,23 @@ from distutils.util import strtobool from transformers.file_utils import _tf_available, _torch_available -try: - run_slow = os.environ["RUN_SLOW"] -except KeyError: - # RUN_SLOW isn't set, default to skipping slow tests. - _run_slow_tests = False -else: - # RUN_SLOW is set, convert it to True or False. +def parse_flag_from_env(key, default=False): try: - _run_slow_tests = strtobool(run_slow) - except ValueError: - # More values are supported, but let's keep the message simple. - raise ValueError("If set, RUN_SLOW must be yes or no.") + value = os.environ[key] + except KeyError: + # KEY isn't set, default to `default`. + _value = default + else: + # KEY is set, convert it to True or False. + try: + _value = strtobool(value) + except ValueError: + # More values are supported, but let's keep the message simple. + raise ValueError("If set, {} must be yes or no.".format(key)) + return _value + +_run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False) +_run_custom_tokenizers = parse_flag_from_env("RUN_CUSTOM_TOKENIZERS", default=False) def slow(test_case): @@ -33,6 +38,19 @@ def slow(test_case): return test_case +def custom_tokenizers(test_case): + """ + Decorator marking a test for a custom tokenizer. + + Custom tokenizers require additional dependencies, and are skipped + by default. Set the RUN_CUSTOM_TOKENIZERS environment variable + to a truthy value to run them. + """ + if not _run_custom_tokenizers: + test_case = unittest.skip("test of custom tokenizers")(test_case) + return test_case + + def require_torch(test_case): """ Decorator marking a test that requires PyTorch. @@ -59,6 +77,6 @@ def require_tf(test_case): if _torch_available: # Set the USE_CUDA environment variable to select a GPU. - torch_device = "cuda" if os.environ.get("USE_CUDA") else "cpu" + torch_device = "cuda" if parse_flag_from_env("USE_CUDA") else "cpu" else: torch_device = None