From f2522869eaa0e957f01b9337b9dc3e0366bc657c Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 19:14:07 +0100 Subject: [PATCH 01/12] Review and update setup.py. --- setup.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 09edddb06ee..4276cc2764d 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ setup( keywords="NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU", license="Apache", url="https://github.com/huggingface/transformers", - package_dir = {'': 'src'}, + package_dir={"": "src"}, packages=find_packages("src"), install_requires=[ "numpy", @@ -69,11 +69,18 @@ setup( ], extras_require=extras, scripts=["transformers-cli"], - # python_requires='>=3.5.0', + python_requires=">=3.5.0", classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], ) From 9fc8dcb2a071b0b00d05d75b4a528f2dee4b2d00 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 20:17:04 +0100 Subject: [PATCH 02/12] Standardize import. Every other file uses this pattern. --- examples/run_ner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/run_ner.py b/examples/run_ner.py index 63f81b077e6..d7972c3415b 100644 --- a/examples/run_ner.py +++ b/examples/run_ner.py @@ -25,7 +25,6 @@ import random import numpy as np import torch from seqeval.metrics import f1_score, precision_score, recall_score -from tensorboardX import SummaryWriter from torch.nn import CrossEntropyLoss from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler @@ -54,6 +53,12 @@ from transformers import ( from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file +try: + from torch.utils.tensorboard import SummaryWriter +except ImportError: + from tensorboardX import SummaryWriter + + logger = logging.getLogger(__name__) ALL_MODELS = sum( From 76a1417f2af9bfad3ad7cd2a5d3ad5de9d5f5f13 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 20:28:26 +0100 Subject: [PATCH 03/12] Include all optional dependencies in extras. Take advantage of this to simplify the Circle CI configuration. Don't bother with tensorboardX: it's a fallback for PyTorch < 1.1.0. --- .circleci/config.yml | 38 +++++++++++++------------------------- CONTRIBUTING.md | 22 +++++++++++----------- setup.cfg | 1 + setup.py | 20 ++++++++++++++------ 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index de3f3e87b95..f5ecf1c70a9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,11 +10,8 @@ jobs: parallelism: 1 steps: - checkout - - run: sudo pip install torch - - run: sudo pip install tensorflow - - run: sudo pip install --progress-bar off . - - run: sudo pip install pytest codecov pytest-cov pytest-xdist - - run: sudo pip install tensorboardX scikit-learn + - run: sudo pip install .[sklearn,tf,torch,testing] + - run: sudo pip install codecov pytest-cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov - run: codecov run_tests_torch: @@ -27,10 +24,8 @@ jobs: parallelism: 1 steps: - checkout - - run: sudo pip install torch - - run: sudo pip install --progress-bar off . - - run: sudo pip install pytest codecov pytest-cov pytest-xdist - - run: sudo pip install tensorboardX scikit-learn + - run: sudo pip install .[sklearn,torch,testing] + - run: sudo pip install codecov pytest-cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov - run: codecov run_tests_tf: @@ -43,10 +38,8 @@ jobs: parallelism: 1 steps: - checkout - - run: sudo pip install tensorflow - - run: sudo pip install --progress-bar off . - - run: sudo pip install pytest codecov pytest-cov pytest-xdist - - run: sudo pip install tensorboardX scikit-learn + - run: sudo pip install .[sklearn,tf,testing] + - run: sudo pip install codecov pytest-cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov - run: codecov run_tests_custom_tokenizers: @@ -55,9 +48,7 @@ jobs: - image: circleci/python:3.5 steps: - checkout - - run: sudo pip install --progress-bar off . - - run: sudo pip install pytest pytest-xdist - - run: sudo pip install mecab-python3 + - run: sudo pip install .[mecab,testing] - run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py run_examples_torch: working_directory: ~/transformers @@ -69,10 +60,8 @@ jobs: parallelism: 1 steps: - checkout - - run: sudo pip install torch - - run: sudo pip install --progress-bar off . - - run: sudo pip install pytest pytest-xdist - - run: sudo pip install tensorboardX scikit-learn + - run: sudo pip install .[sklearn,torch,testing] + - run: sudo pip install -r examples/requirements.txt - run: python -m pytest -n 8 --dist=loadfile -s -v ./examples/ deploy_doc: working_directory: ~/transformers @@ -83,8 +72,7 @@ jobs: fingerprints: - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71" - checkout - - run: sudo pip install --progress-bar off -r docs/requirements.txt - - run: sudo pip install --progress-bar off . + - run: sudo pip install .[tf,torch,docs] - run: ./.circleci/deploy.sh check_code_quality: working_directory: ~/transformers @@ -94,9 +82,9 @@ jobs: parallelism: 1 steps: - checkout - - run: sudo pip install --editable . - - run: sudo pip install torch tensorflow - - run: sudo pip install black git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort flake8 + # we need a version of isort with https://github.com/timothycrosley/isort/pull/1000 + - run: sudo pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort + - run: sudo pip install .[tf,torch,quality] - run: black --check --line-length 119 examples templates tests src utils - run: isort --check-only --recursive examples templates tests src utils - run: flake8 examples templates tests src utils diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7d7f2c73ff6..8e7c8cc8e6e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -102,7 +102,7 @@ Follow these steps to start contributing: clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your github user account. 2. Clone your fork to your local disk, and add the base repository as a remote: - + ```bash $ git clone git@github.com:/transformers.git $ cd transformers @@ -114,43 +114,43 @@ Follow these steps to start contributing: ```bash $ git checkout -b a-descriptive-name-for-my-changes ``` - + **do not** work on the `master` branch. - + 4. Set up a development environment by running the following command in a virtual environment: ```bash - $ pip install -r requirements-dev.txt + $ pip install -e .[dev] ``` 5. Develop the features on your branch. Add changed files using `git add` and then `git commit` to record your changes locally: - + ```bash $ git add modified_file.py $ git commit ``` - + Please write [good commit messages](https://chris.beams.io/posts/git-commit/). It is a good idea to sync your copy of the code with the original repository regularly. This way you can quickly account for changes: - + ```bash $ git fetch upstream $ git rebase upstream/master ``` - + Push the changes to your account using: - + ```bash $ git push -u origin a-descriptive-name-for-my-changes ``` - + 6. Once you are satisfied (**and the checklist below is happy too**), go to the webpage of your fork on Github. Click on 'Pull request' to send your changes to the project maintainers for review. - + 7. It's ok if maintainers ask you for changes. It happens to core contributors too! So everyone can see the changes in the Pull request, work in your local branch and push the changes to your fork. They will automatically appear in diff --git a/setup.cfg b/setup.cfg index f59ce55df71..30ef932f999 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,6 +7,7 @@ known_third_party = fairseq fastprogress git + MeCab nltk packaging PIL diff --git a/setup.py b/setup.py index 4276cc2764d..ff62caa8e1a 100644 --- a/setup.py +++ b/setup.py @@ -37,12 +37,20 @@ To create the package for pypi. from setuptools import find_packages, setup -extras = { - "serving": ["pydantic", "uvicorn", "fastapi"], - "serving-tf": ["pydantic", "uvicorn", "fastapi", "tensorflow"], - "serving-torch": ["pydantic", "uvicorn", "fastapi", "torch"], -} -extras["all"] = [package for package in extras.values()] +extras = {} + +extras["mecab"] = ["mecab-python3"] +extras["sklearn"] = ["scikit-learn"] +extras["tf"] = ["tensorflow"] +extras["torch"] = ["torch"] + +extras["serving"] = ["pydantic", "uvicorn", "fastapi"] +extras["all"] = extras["serving"] + ["tensorflow", "torch"] + +extras["testing"] = ["pytest", "pytest-xdist"] +extras["quality"] = ["black", "isort", "flake8"] +extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme"] +extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"] setup( name="transformers", From 9fcc532df6b3774d745dc1cfb4c0b01e21aef1f9 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 20:28:48 +0100 Subject: [PATCH 04/12] Remove requirements-dev.txt. It was generated once, likely in a non-reproducible way (pip freeze in a contributor's local environment), and never updated. --- requirements-dev.txt | 48 -------------------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 30ae8bf7408..00000000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,48 +0,0 @@ -absl-py==0.8.0 -astor==0.8.0 -atomicwrites==1.3.0 -attrs==19.2.0 -boto3==1.9.243 -botocore==1.12.243 -certifi==2019.9.11 -chardet==3.0.4 -Click==7.0 -docutils==0.15.2 -gast==0.2.2 -google-pasta==0.1.7 -grpcio==1.24.1 -h5py==2.10.0 -idna==2.8 -importlib-metadata==0.23 -jmespath==0.9.4 -joblib==0.14.0 -Keras-Applications==1.0.8 -Keras-Preprocessing==1.1.0 -Markdown==3.1.1 -more-itertools==7.2.0 -numpy==1.17.2 -opt-einsum==3.1.0 -packaging==19.2 -pluggy==0.13.0 -protobuf==3.10.0 -py==1.8.0 -pyparsing==2.4.2 -pytest==5.2.1 -python-dateutil==2.8.0 -regex==2019.8.19 -requests==2.22.0 -s3transfer==0.2.1 -sacremoses==0.0.35 -sentencepiece==0.1.83 -six==1.12.0 -tensorboard==2.0.0 -tensorflow==2.0.0 -tensorflow-estimator==2.0.0 -termcolor==1.1.0 -torch==1.2.0 -tqdm==4.36.1 -urllib3==1.25.6 -wcwidth==0.1.7 -Werkzeug==0.16.0 -wrapt==1.11.2 -zipp==0.6.0 From d73eb552e8ed9b70d10e1848201de8eb8d208866 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 20:33:08 +0100 Subject: [PATCH 05/12] Remove requirements.txt. It's redundant with setup.py and, also, incomplete (e.g. numpy). --- requirements.txt | 12 ------------ setup.py | 7 +++++++ 2 files changed, 7 insertions(+), 12 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 32edee07125..00000000000 --- a/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -# progress bars in model download and training scripts -tqdm -# Accessing files from S3 directly. -boto3 -# Used for downloading models over HTTP -requests -# For OpenAI GPT -regex != 2019.12.17 -# For XLNet -sentencepiece -# For XLM -sacremoses \ No newline at end of file diff --git a/setup.py b/setup.py index ff62caa8e1a..205035507dc 100644 --- a/setup.py +++ b/setup.py @@ -67,12 +67,19 @@ setup( packages=find_packages("src"), install_requires=[ "numpy", + # accessing files from S3 directly "boto3", + # filesystem locks e.g. to prevent parallel downloads "filelock", + # for downloading models over HTTPS "requests", + # progress bars in model download and training scripts "tqdm", + # for OpenAI GPT "regex != 2019.12.17", + # for XLNet "sentencepiece", + # for XLM "sacremoses", ], extras_require=extras, From d79e9c9a9a744ab21f3441d9e691852c95d89be1 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Mon, 23 Dec 2019 19:17:07 +0100 Subject: [PATCH 06/12] Remove docs/requirements.txt. It's superseded by the "docs" extras. --- docs/README.md | 14 +++++++------- docs/requirements.txt | 32 -------------------------------- 2 files changed, 7 insertions(+), 39 deletions(-) delete mode 100644 docs/requirements.txt diff --git a/docs/README.md b/docs/README.md index de37f7cba12..be9fe95b2b3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,25 +1,25 @@ # Generating the documentation To generate the documentation, you first have to build it. Several packages are necessary to build the doc, -you can install them using: +you can install them with the following command, at the root of the code repository: ```bash -pip install -r requirements.txt +pip install .[docs] ``` - + ## Packages installed -Here's an overview of all the packages installed. If you ran the previous command installing all packages from +Here's an overview of all the packages installed. If you ran the previous command installing all packages from `requirements.txt`, you do not need to run the following commands. -Building it requires the package `sphinx` that you can +Building it requires the package `sphinx` that you can install using: ```bash pip install -U sphinx ``` -You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by +You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by [Read The Docs](https://readthedocs.org/). You can install it using the following command: ```bash @@ -34,7 +34,7 @@ pip install recommonmark ## Building the documentation -Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the following +Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the following command to generate it: ```bash diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 63480293f5d..00000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -alabaster==0.7.12 -Babel==2.7.0 -certifi==2019.6.16 -chardet==3.0.4 -commonmark==0.9.0 -docutils==0.14 -future==0.17.1 -idna==2.8 -imagesize==1.1.0 -Jinja2==2.10.1 -MarkupSafe==1.1.1 -packaging==19.0 -Pygments==2.4.2 -pyparsing==2.4.0 -pytz==2019.1 -recommonmark==0.5.0 -requests==2.22.0 -six==1.12.0 -snowballstemmer==1.9.0 -Sphinx==2.1.2 -sphinx-rtd-theme==0.4.3 -sphinxcontrib-applehelp==1.0.1 -sphinxcontrib-devhelp==1.0.1 -sphinxcontrib-htmlhelp==1.0.2 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.2 -sphinxcontrib-serializinghtml==1.1.3 -urllib3==1.25.3 -sphinx-markdown-tables==0.0.9 -numpy==1.17.2 -tensorflow==2.0.0rc2 -torch==1.2.0 \ No newline at end of file From c3783399db1967fcec63b1ca3465343c7c887a5f Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 20:37:50 +0100 Subject: [PATCH 07/12] Remove redundant requirements with transformers. --- examples/distillation/requirements.txt | 3 ++- examples/summarization/requirements.txt | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/examples/distillation/requirements.txt b/examples/distillation/requirements.txt index 491924ee2c6..1f1a1b8a6e1 100644 --- a/examples/distillation/requirements.txt +++ b/examples/distillation/requirements.txt @@ -1,6 +1,7 @@ +transformers + gitpython==3.0.2 tensorboard>=1.14.0 tensorboardX==1.8 psutil==5.6.3 scipy==1.3.1 -transformers diff --git a/examples/summarization/requirements.txt b/examples/summarization/requirements.txt index 36d75a5edc5..f984af489cf 100644 --- a/examples/summarization/requirements.txt +++ b/examples/summarization/requirements.txt @@ -1,9 +1,5 @@ -# progress bars in model download and training scripts -tqdm -# Accessing files from S3 directly. -boto3 -# Used for downloading models over HTTP -requests +transformers + # For ROUGE nltk py-rouge From 70373a5f7cb13a7fc6170e3c38088fa63746e10e Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 21:31:12 +0100 Subject: [PATCH 08/12] Update contribution instructions. Also provide shortcuts in a Makefile. --- CONTRIBUTING.md | 93 ++++++++++++++++++++++++++++++++++--- Makefile | 21 ++++++++- README.md | 49 +++++++++++-------- docs/source/installation.md | 18 +------ 4 files changed, 137 insertions(+), 44 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8e7c8cc8e6e..220ad026706 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -100,7 +100,8 @@ Follow these steps to start contributing: 1. Fork the [repository](https://github.com/huggingface/transformers) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code - under your github user account. + under your GitHub user account. + 2. Clone your fork to your local disk, and add the base repository as a remote: ```bash @@ -123,8 +124,38 @@ Follow these steps to start contributing: $ pip install -e .[dev] ``` -5. Develop the features on your branch. Add changed files using `git add` and - then `git commit` to record your changes locally: + Right now, we need an unreleased version of `isort` to avoid a + [bug](https://github.com/timothycrosley/isort/pull/1000): + + ```bash + $ pip install -U git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort + ``` + +5. Develop the features on your branch. + + As you work on the features, you should make sure that the test suite + passes: + + ```bash + $ make test + ``` + + `transformers` relies on `black` and `isort` to format its source code + consistently. After you make changes, format them with: + + ```bash + $ make style + ``` + + `transformers` also uses `flake8` to check for coding mistakes. Quality + control runs in CI, however you can also run the same checks with: + + ```bash + $ make quality + ``` + + Once you're happy with your changes, add changed files using `git add` and + make a commit with `git commit` to record your changes locally: ```bash $ git add modified_file.py @@ -132,9 +163,10 @@ Follow these steps to start contributing: ``` Please write [good commit - messages](https://chris.beams.io/posts/git-commit/). It - is a good idea to sync your copy of the code with the original repository - regularly. This way you can quickly account for changes: + messages](https://chris.beams.io/posts/git-commit/). + + It is a good idea to sync your copy of the code with the original + repository regularly. This way you can quickly account for changes: ```bash $ git fetch upstream @@ -148,7 +180,7 @@ Follow these steps to start contributing: ``` 6. Once you are satisfied (**and the checklist below is happy too**), go to the - webpage of your fork on Github. Click on 'Pull request' to send your changes + webpage of your fork on GitHub. Click on 'Pull request' to send your changes to the project maintainers for review. 7. It's ok if maintainers ask you for changes. It happens to core contributors @@ -171,6 +203,53 @@ Follow these steps to start contributing: 6. All public methods must have informative docstrings; +### Tests + +You can run 🤗 Transformers tests with `unittest` or `pytest`. + +We like `pytest` and `pytest-xdist` because it's faster. From the root of the +repository, here's how to run tests with `pytest` for the library: + +```bash +$ python -m pytest -n auto --dist=loadfile -s -v ./tests/ +``` + +and for the examples: + +```bash +$ pip install -r examples/requirements.txt # only needed the first time +$ python -m pytest -n auto --dist=loadfile -s -v ./examples/ +``` + +In fact, that's how `make test` and `make test-examples` are implemented! + +You can specify a smaller set of tests in order to test only the feature +you're working on. + +By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to +`yes` to run them. This will download many gigabytes of models — make sure you +have enough disk space and a good Internet connection, or a lot of patience! + +```bash +$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/ +$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/ +``` + +Likewise, set the `RUN_CUSTOM_TOKENIZERS` environment variable to `yes` to run +tests for custom tokenizers, which don't run by default either. + +🤗 Transformers uses `pytest` as a test runner only. It doesn't use any +`pytest`-specific features in the test suite itself. + +This means `unittest` is fully supported. Here's how to run tests with +`unittest`: + +```bash +$ python -m unittest discover -s tests -t . -v +$ python -m unittest discover -s examples -t examples -v +``` + + ### Style guide For documentation strings, `transformers` follows the [google diff --git a/Makefile b/Makefile index b93cd3134a6..6fccec8c214 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,24 @@ -.PHONY: style +.PHONY: quality style test test-examples + +# Check that source code meets quality standards + +quality: + black --check --line-length 119 examples templates tests src utils + isort --check-only --recursive examples templates tests src utils + flake8 examples templates tests src utils + +# Format source code automatically style: black --line-length 119 examples templates tests src utils isort --recursive examples templates tests src utils + +# Run tests for the library + +test: + python -m pytest -n auto --dist=loadfile -s -v ./tests/ + +# Run tests for examples + +test-examples: + python -m pytest -n auto --dist=loadfile -s -v ./examples/ diff --git a/README.md b/README.md index ea1d595d43d..f11fceebe8a 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,12 @@ Choose the right framework for every part of a model's lifetime This repo is tested on Python 3.5+, PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1 +You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). + +Create a virtual environment with the version of Python you're going to use and activate it. + +Now, if you want to use 🤗 Transformers, you can install it with pip. If you'd like to play with the examples, you must install it from source. + ### With pip First you need to install one of, or both, TensorFlow 2.0 and PyTorch. @@ -84,44 +90,49 @@ Please refer to [TensorFlow installation page](https://www.tensorflow.org/instal When TensorFlow 2.0 and/or PyTorch has been installed, you can install from source by cloning the repository and running: -```bash -pip install [--editable] . -``` - -### Run the examples - -Examples are included in the repository but are not shipped with the library. -Therefore, in order to run the latest versions of the examples you also need to install from source. To do so, create a new virtual environment and follow these steps: - ```bash git clone https://github.com/huggingface/transformers cd transformers pip install [--editable] . ``` +When you update the repository, you should upgrade the transformers installation and its dependencies as follows: + +```bash +git pull +pip install --upgrade [--editable] . +``` + +### Run the examples + +Examples are included in the repository but are not shipped with the library. + +Therefore, in order to run the latest versions of the examples, you need to install from source, as described above. + +Look at the [README](https://github.com/huggingface/transformers/blob/master/examples/README.md) for how to run examples. + ### Tests -A series of tests are included for the library and the example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples). - -These tests can be run using `unittest` or `pytest` (install pytest if needed with `pip install pytest`). +A series of tests are included for the library and for some example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples). Depending on which framework is installed (TensorFlow 2.0 and/or PyTorch), the irrelevant tests will be skipped. Ensure that both frameworks are installed if you want to execute all tests. -You can run the tests from the root of the cloned repository with the commands: +Here's the easiest way to run tests for the library: ```bash -python -m unittest discover -s tests -t . -v -python -m unittest discover -s examples -t examples -v +pip install -e .[testing] +make test ``` -or +and for the examples: ```bash -python -m pytest -sv ./tests/ -python -m pytest -sv ./examples/ +pip install -e .[testing] +pip install -r examples/requirements.txt +make test-examples ``` -By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `yes` to run them. +For details, refer to the [contributing guide](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#tests). ### Do you want to run a Transformer model on a mobile device? diff --git a/docs/source/installation.md b/docs/source/installation.md index d7198c17a70..513c6f2b404 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -24,23 +24,7 @@ pip install [--editable] . An extensive test suite is included to test the library behavior and several examples. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples). -Tests can be run using `unittest` or `pytest` (install pytest if needed with `pip install pytest`). - -Run all the tests from the root of the cloned repository with the commands: - -```bash -python -m unittest discover -s tests -t . -v -python -m unittest discover -s examples -t examples -v -``` - -or - -``` bash -python -m pytest -sv ./tests/ -python -m pytest -sv ./examples/ -``` - -By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `yes` to run them. +Refer to the [contributing guide](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#tests) for details about running tests. ## OpenAI GPT original tokenization workflow From 7a865821d951bad882fd46cbbbd8ea7dfb8e4b9b Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Mon, 23 Dec 2019 20:06:39 +0100 Subject: [PATCH 09/12] Remove stray egg-info directory automatically. If a user or contributor ran `pip install -e .` on transformers < 3.0, pip created a transformers.egg-info directory next to the transformers directory at the root of the repository. In transformers 3.0, the source is in a `src` subdirectory. `pip install -e .` creates a transformers.egg-info directory there. However, pip will still pick transformers.egg-info from the previous location. This is a bug: https://github.com/pypa/pip/issues/5466 Users and contributors are likely to hit this problem because the documentation for transformers 3.0 relies heavily on extra_requires which didn't exist in earlier versions, so aren't defined in a stale transformers.egg-info directory. If such a directory exists, remove it. It's autogenerated, gitignored and not supposed to contain anything of value. --- setup.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/setup.py b/setup.py index 205035507dc..558a38ea8b7 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,28 @@ To create the package for pypi. """ +import shutil +from pathlib import Path + from setuptools import find_packages, setup +# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466 +stale_egg_info = Path(__file__).parent / "transformers.egg-info" +if stale_egg_info.exists(): + print( + ( + "Warning: {} exists.\n\n" + "If you recently updated transformers to 3.0 or later, this is expected,\n" + "but it may prevent transformers from installing in editable mode.\n\n" + "This directory is automatically generated by Python's packaging tools.\n" + "I will remove it now.\n\n" + "See https://github.com/pypa/pip/issues/5466 for details.\n" + ).format(stale_egg_info) + ) + shutil.rmtree(stale_egg_info) + + extras = {} extras["mecab"] = ["mecab-python3"] From 8a6881822a14bfffe7f30f900a067024736ca612 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 22 Dec 2019 21:35:09 +0100 Subject: [PATCH 10/12] Run some tests on Python 3.7. This will improve version coverage. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f5ecf1c70a9..af07aba63ca 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,7 +17,7 @@ jobs: run_tests_torch: working_directory: ~/transformers docker: - - image: circleci/python:3.5 + - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 resource_class: xlarge @@ -31,7 +31,7 @@ jobs: run_tests_tf: working_directory: ~/transformers docker: - - image: circleci/python:3.5 + - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 resource_class: xlarge From a8d34e534e2bf4900ea225e5608981b5b9a0c706 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Tue, 24 Dec 2019 08:46:08 +0100 Subject: [PATCH 11/12] Remove [--editable] in install instructions. Use -e only in docs targeted at contributors. If a user copy-pastes command line with [--editable], they will hit an error. If they don't know the --editable option, we're giving them a choice to make before they can move forwards, but this isn't a choice they need to make right now. --- CONTRIBUTING.md | 4 ++++ README.md | 4 ++-- docs/README.md | 2 +- docs/source/installation.md | 2 +- examples/README.md | 2 +- examples/pplm/README.md | 2 +- examples/summarization/README.md | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 220ad026706..7ff83ce0cc0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -124,6 +124,10 @@ Follow these steps to start contributing: $ pip install -e .[dev] ``` + (If transformers was already installed in the virtual environment, remove + it with `pip uninstall transformers` before reinstalling it in editable + mode with the `-e` flag.) + Right now, we need an unreleased version of `isort` to avoid a [bug](https://github.com/timothycrosley/isort/pull/1000): diff --git a/README.md b/README.md index f11fceebe8a..7a96004c582 100644 --- a/README.md +++ b/README.md @@ -93,14 +93,14 @@ When TensorFlow 2.0 and/or PyTorch has been installed, you can install from sour ```bash git clone https://github.com/huggingface/transformers cd transformers -pip install [--editable] . +pip install . ``` When you update the repository, you should upgrade the transformers installation and its dependencies as follows: ```bash git pull -pip install --upgrade [--editable] . +pip install --upgrade . ``` ### Run the examples diff --git a/docs/README.md b/docs/README.md index be9fe95b2b3..1c989d8c902 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,7 +4,7 @@ To generate the documentation, you first have to build it. Several packages are you can install them with the following command, at the root of the code repository: ```bash -pip install .[docs] +pip install -e .[docs] ``` ## Packages installed diff --git a/docs/source/installation.md b/docs/source/installation.md index 513c6f2b404..f4b7781ea9a 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -17,7 +17,7 @@ To install from source, clone the repository and install with: ``` bash git clone https://github.com/huggingface/transformers.git cd transformers -pip install [--editable] . +pip install . ``` ## Tests diff --git a/examples/README.md b/examples/README.md index e311686ede4..b0a9f978f8d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -10,7 +10,7 @@ Execute the following steps in a new virtual environment: ```bash git clone https://github.com/huggingface/transformers cd transformers -pip install [--editable] . +pip install . pip install -r ./examples/requirements.txt ``` diff --git a/examples/pplm/README.md b/examples/pplm/README.md index b12205854aa..ed105f95cf4 100644 --- a/examples/pplm/README.md +++ b/examples/pplm/README.md @@ -15,7 +15,7 @@ Please check out the repo under uber-research for more information: https://gith ```bash git clone https://github.com/huggingface/transformers && cd transformers -pip install [--editable] . +pip install . pip install nltk torchtext # additional requirements. cd examples/pplm ``` diff --git a/examples/summarization/README.md b/examples/summarization/README.md index b98581e8e5f..250c4bcfe84 100644 --- a/examples/summarization/README.md +++ b/examples/summarization/README.md @@ -10,7 +10,7 @@ The model is loaded with the pre-trained weights for the abstractive summarizati ``` git clone https://github.com/huggingface/transformers && cd transformers -pip install [--editable] . +pip install . pip install nltk py-rouge cd examples/summarization ``` From 10724a81235a99f511006c270861558fba685168 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Tue, 24 Dec 2019 09:09:13 +0100 Subject: [PATCH 12/12] Run the slow tests every Monday morning. --- .circleci/config.yml | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index af07aba63ca..44dc19ea43d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,6 +14,20 @@ jobs: - run: sudo pip install codecov pytest-cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov - run: codecov + run_all_tests_torch_and_tf: + working_directory: ~/transformers + docker: + - image: circleci/python:3.5 + environment: + OMP_NUM_THREADS: 1 + RUN_SLOW: yes + RUN_CUSTOM_TOKENIZERS: yes + resource_class: xlarge + parallelism: 1 + steps: + - checkout + - run: sudo pip install .[mecab,sklearn,tf,torch,testing] + - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ run_tests_torch: working_directory: ~/transformers docker: @@ -46,10 +60,12 @@ jobs: working_directory: ~/transformers docker: - image: circleci/python:3.5 + environment: + RUN_CUSTOM_TOKENIZERS: yes steps: - checkout - run: sudo pip install .[mecab,testing] - - run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py + - run: python -m pytest -sv ./tests/test_tokenization_bert_japanese.py run_examples_torch: working_directory: ~/transformers docker: @@ -69,8 +85,8 @@ jobs: - image: circleci/python:3.5 steps: - add_ssh_keys: - fingerprints: - - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71" + fingerprints: + - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71" - checkout - run: sudo pip install .[tf,torch,docs] - run: ./.circleci/deploy.sh @@ -115,3 +131,13 @@ workflows: - run_tests_torch - run_tests_tf - deploy_doc: *workflow_filters + run_slow_tests: + triggers: + - schedule: + cron: "0 4 * * 1" + filters: + branches: + only: + - master + jobs: + - run_all_tests_torch_and_tf