Merge pull request #2271 from aaugustin/improve-setup-and-requirements

Improve setup and requirements
This commit is contained in:
Thomas Wolf 2019-12-24 11:21:20 +01:00 committed by GitHub
commit 81db12c3ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 265 additions and 202 deletions

View File

@ -10,55 +10,62 @@ jobs:
parallelism: 1
steps:
- checkout
- run: sudo pip install torch
- run: sudo pip install tensorflow
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn
- run: sudo pip install .[sklearn,tf,torch,testing]
- run: sudo pip install codecov pytest-cov
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_torch:
run_all_tests_torch_and_tf:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
environment:
OMP_NUM_THREADS: 1
RUN_SLOW: yes
RUN_CUSTOM_TOKENIZERS: yes
resource_class: xlarge
parallelism: 1
steps:
- checkout
- run: sudo pip install .[mecab,sklearn,tf,torch,testing]
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/
run_tests_torch:
working_directory: ~/transformers
docker:
- image: circleci/python:3.7
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge
parallelism: 1
steps:
- checkout
- run: sudo pip install torch
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn
- run: sudo pip install .[sklearn,torch,testing]
- run: sudo pip install codecov pytest-cov
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_tf:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
- image: circleci/python:3.7
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge
parallelism: 1
steps:
- checkout
- run: sudo pip install tensorflow
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn
- run: sudo pip install .[sklearn,tf,testing]
- run: sudo pip install codecov pytest-cov
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_custom_tokenizers:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
environment:
RUN_CUSTOM_TOKENIZERS: yes
steps:
- checkout
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest pytest-xdist
- run: sudo pip install mecab-python3
- run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py
- run: sudo pip install .[mecab,testing]
- run: python -m pytest -sv ./tests/test_tokenization_bert_japanese.py
run_examples_torch:
working_directory: ~/transformers
docker:
@ -69,10 +76,8 @@ jobs:
parallelism: 1
steps:
- checkout
- run: sudo pip install torch
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest pytest-xdist
- run: sudo pip install tensorboardX scikit-learn
- run: sudo pip install .[sklearn,torch,testing]
- run: sudo pip install -r examples/requirements.txt
- run: python -m pytest -n 8 --dist=loadfile -s -v ./examples/
deploy_doc:
working_directory: ~/transformers
@ -80,11 +85,10 @@ jobs:
- image: circleci/python:3.5
steps:
- add_ssh_keys:
fingerprints:
- "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
fingerprints:
- "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
- checkout
- run: sudo pip install --progress-bar off -r docs/requirements.txt
- run: sudo pip install --progress-bar off .
- run: sudo pip install .[tf,torch,docs]
- run: ./.circleci/deploy.sh
check_code_quality:
working_directory: ~/transformers
@ -94,9 +98,9 @@ jobs:
parallelism: 1
steps:
- checkout
- run: sudo pip install --editable .
- run: sudo pip install torch tensorflow
- run: sudo pip install black git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort flake8
# we need a version of isort with https://github.com/timothycrosley/isort/pull/1000
- run: sudo pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
- run: sudo pip install .[tf,torch,quality]
- run: black --check --line-length 119 examples templates tests src utils
- run: isort --check-only --recursive examples templates tests src utils
- run: flake8 examples templates tests src utils
@ -127,3 +131,13 @@ workflows:
- run_tests_torch
- run_tests_tf
- deploy_doc: *workflow_filters
run_slow_tests:
triggers:
- schedule:
cron: "0 4 * * 1"
filters:
branches:
only:
- master
jobs:
- run_all_tests_torch_and_tf

View File

@ -100,9 +100,10 @@ Follow these steps to start contributing:
1. Fork the [repository](https://github.com/huggingface/transformers) by
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
under your github user account.
under your GitHub user account.
2. Clone your fork to your local disk, and add the base repository as a remote:
```bash
$ git clone git@github.com:<your Github handle>/transformers.git
$ cd transformers
@ -114,43 +115,78 @@ Follow these steps to start contributing:
```bash
$ git checkout -b a-descriptive-name-for-my-changes
```
**do not** work on the `master` branch.
4. Set up a development environment by running the following command in a virtual environment:
```bash
$ pip install -r requirements-dev.txt
$ pip install -e .[dev]
```
5. Develop the features on your branch. Add changed files using `git add` and
then `git commit` to record your changes locally:
(If transformers was already installed in the virtual environment, remove
it with `pip uninstall transformers` before reinstalling it in editable
mode with the `-e` flag.)
Right now, we need an unreleased version of `isort` to avoid a
[bug](https://github.com/timothycrosley/isort/pull/1000):
```bash
$ pip install -U git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
```
5. Develop the features on your branch.
As you work on the features, you should make sure that the test suite
passes:
```bash
$ make test
```
`transformers` relies on `black` and `isort` to format its source code
consistently. After you make changes, format them with:
```bash
$ make style
```
`transformers` also uses `flake8` to check for coding mistakes. Quality
control runs in CI, however you can also run the same checks with:
```bash
$ make quality
```
Once you're happy with your changes, add changed files using `git add` and
make a commit with `git commit` to record your changes locally:
```bash
$ git add modified_file.py
$ git commit
```
Please write [good commit
messages](https://chris.beams.io/posts/git-commit/). It
is a good idea to sync your copy of the code with the original repository
regularly. This way you can quickly account for changes:
messages](https://chris.beams.io/posts/git-commit/).
It is a good idea to sync your copy of the code with the original
repository regularly. This way you can quickly account for changes:
```bash
$ git fetch upstream
$ git rebase upstream/master
```
Push the changes to your account using:
```bash
$ git push -u origin a-descriptive-name-for-my-changes
```
6. Once you are satisfied (**and the checklist below is happy too**), go to the
webpage of your fork on Github. Click on 'Pull request' to send your changes
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
to the project maintainers for review.
7. It's ok if maintainers ask you for changes. It happens to core contributors
too! So everyone can see the changes in the Pull request, work in your local
branch and push the changes to your fork. They will automatically appear in
@ -171,6 +207,53 @@ Follow these steps to start contributing:
6. All public methods must have informative docstrings;
### Tests
You can run 🤗 Transformers tests with `unittest` or `pytest`.
We like `pytest` and `pytest-xdist` because it's faster. From the root of the
repository, here's how to run tests with `pytest` for the library:
```bash
$ python -m pytest -n auto --dist=loadfile -s -v ./tests/
```
and for the examples:
```bash
$ pip install -r examples/requirements.txt # only needed the first time
$ python -m pytest -n auto --dist=loadfile -s -v ./examples/
```
In fact, that's how `make test` and `make test-examples` are implemented!
You can specify a smaller set of tests in order to test only the feature
you're working on.
By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to
`yes` to run them. This will download many gigabytes of models — make sure you
have enough disk space and a good Internet connection, or a lot of patience!
```bash
$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/
$ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/
```
Likewise, set the `RUN_CUSTOM_TOKENIZERS` environment variable to `yes` to run
tests for custom tokenizers, which don't run by default either.
🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
`pytest`-specific features in the test suite itself.
This means `unittest` is fully supported. Here's how to run tests with
`unittest`:
```bash
$ python -m unittest discover -s tests -t . -v
$ python -m unittest discover -s examples -t examples -v
```
### Style guide
For documentation strings, `transformers` follows the [google

View File

@ -1,5 +1,24 @@
.PHONY: style
.PHONY: quality style test test-examples
# Check that source code meets quality standards
quality:
black --check --line-length 119 examples templates tests src utils
isort --check-only --recursive examples templates tests src utils
flake8 examples templates tests src utils
# Format source code automatically
style:
black --line-length 119 examples templates tests src utils
isort --recursive examples templates tests src utils
# Run tests for the library
test:
python -m pytest -n auto --dist=loadfile -s -v ./tests/
# Run tests for examples
test-examples:
python -m pytest -n auto --dist=loadfile -s -v ./examples/

View File

@ -66,6 +66,12 @@ Choose the right framework for every part of a model's lifetime
This repo is tested on Python 3.5+, PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1
You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
Create a virtual environment with the version of Python you're going to use and activate it.
Now, if you want to use 🤗 Transformers, you can install it with pip. If you'd like to play with the examples, you must install it from source.
### With pip
First you need to install one of, or both, TensorFlow 2.0 and PyTorch.
@ -85,43 +91,48 @@ Please refer to [TensorFlow installation page](https://www.tensorflow.org/instal
When TensorFlow 2.0 and/or PyTorch has been installed, you can install from source by cloning the repository and running:
```bash
pip install [--editable] .
git clone https://github.com/huggingface/transformers
cd transformers
pip install .
```
When you update the repository, you should upgrade the transformers installation and its dependencies as follows:
```bash
git pull
pip install --upgrade .
```
### Run the examples
Examples are included in the repository but are not shipped with the library.
Therefore, in order to run the latest versions of the examples you also need to install from source. To do so, create a new virtual environment and follow these steps:
```bash
git clone https://github.com/huggingface/transformers
cd transformers
pip install [--editable] .
```
Therefore, in order to run the latest versions of the examples, you need to install from source, as described above.
Look at the [README](https://github.com/huggingface/transformers/blob/master/examples/README.md) for how to run examples.
### Tests
A series of tests are included for the library and the example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples).
These tests can be run using `unittest` or `pytest` (install pytest if needed with `pip install pytest`).
A series of tests are included for the library and for some example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples).
Depending on which framework is installed (TensorFlow 2.0 and/or PyTorch), the irrelevant tests will be skipped. Ensure that both frameworks are installed if you want to execute all tests.
You can run the tests from the root of the cloned repository with the commands:
Here's the easiest way to run tests for the library:
```bash
python -m unittest discover -s tests -t . -v
python -m unittest discover -s examples -t examples -v
pip install -e .[testing]
make test
```
or
and for the examples:
```bash
python -m pytest -sv ./tests/
python -m pytest -sv ./examples/
pip install -e .[testing]
pip install -r examples/requirements.txt
make test-examples
```
By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `yes` to run them.
For details, refer to the [contributing guide](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#tests).
### Do you want to run a Transformer model on a mobile device?

View File

@ -1,25 +1,25 @@
# Generating the documentation
To generate the documentation, you first have to build it. Several packages are necessary to build the doc,
you can install them using:
you can install them with the following command, at the root of the code repository:
```bash
pip install -r requirements.txt
pip install -e .[docs]
```
## Packages installed
Here's an overview of all the packages installed. If you ran the previous command installing all packages from
Here's an overview of all the packages installed. If you ran the previous command installing all packages from
`requirements.txt`, you do not need to run the following commands.
Building it requires the package `sphinx` that you can
Building it requires the package `sphinx` that you can
install using:
```bash
pip install -U sphinx
```
You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by
You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by
[Read The Docs](https://readthedocs.org/). You can install it using the following command:
```bash
@ -34,7 +34,7 @@ pip install recommonmark
## Building the documentation
Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the following
Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the following
command to generate it:
```bash

View File

@ -1,32 +0,0 @@
alabaster==0.7.12
Babel==2.7.0
certifi==2019.6.16
chardet==3.0.4
commonmark==0.9.0
docutils==0.14
future==0.17.1
idna==2.8
imagesize==1.1.0
Jinja2==2.10.1
MarkupSafe==1.1.1
packaging==19.0
Pygments==2.4.2
pyparsing==2.4.0
pytz==2019.1
recommonmark==0.5.0
requests==2.22.0
six==1.12.0
snowballstemmer==1.9.0
Sphinx==2.1.2
sphinx-rtd-theme==0.4.3
sphinxcontrib-applehelp==1.0.1
sphinxcontrib-devhelp==1.0.1
sphinxcontrib-htmlhelp==1.0.2
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.2
sphinxcontrib-serializinghtml==1.1.3
urllib3==1.25.3
sphinx-markdown-tables==0.0.9
numpy==1.17.2
tensorflow==2.0.0rc2
torch==1.2.0

View File

@ -17,30 +17,14 @@ To install from source, clone the repository and install with:
``` bash
git clone https://github.com/huggingface/transformers.git
cd transformers
pip install [--editable] .
pip install .
```
## Tests
An extensive test suite is included to test the library behavior and several examples. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples).
Tests can be run using `unittest` or `pytest` (install pytest if needed with `pip install pytest`).
Run all the tests from the root of the cloned repository with the commands:
```bash
python -m unittest discover -s tests -t . -v
python -m unittest discover -s examples -t examples -v
```
or
``` bash
python -m pytest -sv ./tests/
python -m pytest -sv ./examples/
```
By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `yes` to run them.
Refer to the [contributing guide](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#tests) for details about running tests.
## OpenAI GPT original tokenization workflow

View File

@ -10,7 +10,7 @@ Execute the following steps in a new virtual environment:
```bash
git clone https://github.com/huggingface/transformers
cd transformers
pip install [--editable] .
pip install .
pip install -r ./examples/requirements.txt
```

View File

@ -1,6 +1,7 @@
transformers
gitpython==3.0.2
tensorboard>=1.14.0
tensorboardX==1.8
psutil==5.6.3
scipy==1.3.1
transformers

View File

@ -15,7 +15,7 @@ Please check out the repo under uber-research for more information: https://gith
```bash
git clone https://github.com/huggingface/transformers && cd transformers
pip install [--editable] .
pip install .
pip install nltk torchtext # additional requirements.
cd examples/pplm
```

View File

@ -25,7 +25,6 @@ import random
import numpy as np
import torch
from seqeval.metrics import f1_score, precision_score, recall_score
from tensorboardX import SummaryWriter
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
@ -54,6 +53,12 @@ from transformers import (
from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
from tensorboardX import SummaryWriter
logger = logging.getLogger(__name__)
ALL_MODELS = sum(

View File

@ -10,7 +10,7 @@ The model is loaded with the pre-trained weights for the abstractive summarizati
```
git clone https://github.com/huggingface/transformers && cd transformers
pip install [--editable] .
pip install .
pip install nltk py-rouge
cd examples/summarization
```

View File

@ -1,9 +1,5 @@
# progress bars in model download and training scripts
tqdm
# Accessing files from S3 directly.
boto3
# Used for downloading models over HTTP
requests
transformers
# For ROUGE
nltk
py-rouge

View File

@ -1,48 +0,0 @@
absl-py==0.8.0
astor==0.8.0
atomicwrites==1.3.0
attrs==19.2.0
boto3==1.9.243
botocore==1.12.243
certifi==2019.9.11
chardet==3.0.4
Click==7.0
docutils==0.15.2
gast==0.2.2
google-pasta==0.1.7
grpcio==1.24.1
h5py==2.10.0
idna==2.8
importlib-metadata==0.23
jmespath==0.9.4
joblib==0.14.0
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
Markdown==3.1.1
more-itertools==7.2.0
numpy==1.17.2
opt-einsum==3.1.0
packaging==19.2
pluggy==0.13.0
protobuf==3.10.0
py==1.8.0
pyparsing==2.4.2
pytest==5.2.1
python-dateutil==2.8.0
regex==2019.8.19
requests==2.22.0
s3transfer==0.2.1
sacremoses==0.0.35
sentencepiece==0.1.83
six==1.12.0
tensorboard==2.0.0
tensorflow==2.0.0
tensorflow-estimator==2.0.0
termcolor==1.1.0
torch==1.2.0
tqdm==4.36.1
urllib3==1.25.6
wcwidth==0.1.7
Werkzeug==0.16.0
wrapt==1.11.2
zipp==0.6.0

View File

@ -1,12 +0,0 @@
# progress bars in model download and training scripts
tqdm
# Accessing files from S3 directly.
boto3
# Used for downloading models over HTTP
requests
# For OpenAI GPT
regex != 2019.12.17
# For XLNet
sentencepiece
# For XLM
sacremoses

View File

@ -7,6 +7,7 @@ known_third_party =
fairseq
fastprogress
git
MeCab
nltk
packaging
PIL

View File

@ -34,15 +34,42 @@ To create the package for pypi.
"""
import shutil
from pathlib import Path
from setuptools import find_packages, setup
extras = {
"serving": ["pydantic", "uvicorn", "fastapi"],
"serving-tf": ["pydantic", "uvicorn", "fastapi", "tensorflow"],
"serving-torch": ["pydantic", "uvicorn", "fastapi", "torch"],
}
extras["all"] = [package for package in extras.values()]
# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
stale_egg_info = Path(__file__).parent / "transformers.egg-info"
if stale_egg_info.exists():
print(
(
"Warning: {} exists.\n\n"
"If you recently updated transformers to 3.0 or later, this is expected,\n"
"but it may prevent transformers from installing in editable mode.\n\n"
"This directory is automatically generated by Python's packaging tools.\n"
"I will remove it now.\n\n"
"See https://github.com/pypa/pip/issues/5466 for details.\n"
).format(stale_egg_info)
)
shutil.rmtree(stale_egg_info)
extras = {}
extras["mecab"] = ["mecab-python3"]
extras["sklearn"] = ["scikit-learn"]
extras["tf"] = ["tensorflow"]
extras["torch"] = ["torch"]
extras["serving"] = ["pydantic", "uvicorn", "fastapi"]
extras["all"] = extras["serving"] + ["tensorflow", "torch"]
extras["testing"] = ["pytest", "pytest-xdist"]
extras["quality"] = ["black", "isort", "flake8"]
extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme"]
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]
setup(
name="transformers",
@ -55,25 +82,39 @@ setup(
keywords="NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU",
license="Apache",
url="https://github.com/huggingface/transformers",
package_dir = {'': 'src'},
package_dir={"": "src"},
packages=find_packages("src"),
install_requires=[
"numpy",
# accessing files from S3 directly
"boto3",
# filesystem locks e.g. to prevent parallel downloads
"filelock",
# for downloading models over HTTPS
"requests",
# progress bars in model download and training scripts
"tqdm",
# for OpenAI GPT
"regex != 2019.12.17",
# for XLNet
"sentencepiece",
# for XLM
"sacremoses",
],
extras_require=extras,
scripts=["transformers-cli"],
# python_requires='>=3.5.0',
python_requires=">=3.5.0",
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
)