mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Remove add-new-model in favor of add-new-model-like (#30424)
* Remove add-new-model in favor of add-new-model-like * nits
This commit is contained in:
parent
0eb8fbcdac
commit
d4e92f1a21
81
.github/workflows/model-templates.yml
vendored
81
.github/workflows/model-templates.yml
vendored
@ -1,81 +0,0 @@
|
||||
name: Model templates runner
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "0 2 * * *"
|
||||
|
||||
jobs:
|
||||
run_tests_templates:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt -y update && sudo apt install -y libsndfile1-dev
|
||||
|
||||
- name: Load cached virtual environment
|
||||
uses: actions/cache@v2
|
||||
id: cache
|
||||
with:
|
||||
path: ~/venv/
|
||||
key: v4-tests_templates-${{ hashFiles('setup.py') }}
|
||||
|
||||
- name: Create virtual environment on cache miss
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
python -m venv ~/venv && . ~/venv/bin/activate
|
||||
pip install --upgrade pip!=21.3
|
||||
pip install -e .[dev]
|
||||
|
||||
- name: Check transformers location
|
||||
# make `transformers` available as package (required since we use `-e` flag) and check it's indeed from the repo.
|
||||
run: |
|
||||
. ~/venv/bin/activate
|
||||
python setup.py develop
|
||||
transformer_loc=$(pip show transformers | grep "Location: " | cut -c11-)
|
||||
transformer_repo_loc=$(pwd .)
|
||||
if [ "$transformer_loc" != "$transformer_repo_loc/src" ]; then
|
||||
echo "transformers is from $transformer_loc but it shoud be from $transformer_repo_loc/src."
|
||||
echo "A fix is required. Stop testing."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Create model files
|
||||
run: |
|
||||
. ~/venv/bin/activate
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/flax-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/flax-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||
make style
|
||||
python utils/check_table.py --fix_and_overwrite
|
||||
python utils/check_dummies.py --fix_and_overwrite
|
||||
python utils/check_copies.py --fix_and_overwrite
|
||||
|
||||
- name: Run all non-slow tests
|
||||
run: |
|
||||
. ~/venv/bin/activate
|
||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_templates tests/*template*
|
||||
|
||||
- name: Run style changes
|
||||
run: |
|
||||
. ~/venv/bin/activate
|
||||
make style && make quality && make repo-consistency
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: cat reports/tests_templates/failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: run_all_tests_templates_test_reports
|
||||
path: reports/tests_templates
|
@ -110,7 +110,7 @@ New models are constantly released and if you want to implement a new model, ple
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
|
||||
|
||||
We have added a [detailed guide and templates](https://github.com/huggingface/transformers/tree/main/templates) to help you get started with adding a new model, and we also have a more technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
|
||||
We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
|
||||
|
||||
## Do you want to add documentation?
|
||||
|
||||
|
@ -398,12 +398,14 @@ In dem speziellen Fall, dass Sie ein Modell hinzufügen, dessen Architektur gena
|
||||
Modells übereinstimmt, müssen Sie nur ein Konvertierungsskript hinzufügen, wie in [diesem Abschnitt](#write-a-conversion-script) beschrieben.
|
||||
In diesem Fall können Sie einfach die gesamte Modellarchitektur des bereits vorhandenen Modells wiederverwenden.
|
||||
|
||||
Andernfalls beginnen wir mit der Erstellung eines neuen Modells. Sie haben hier zwei Möglichkeiten:
|
||||
Andernfalls beginnen wir mit der Erstellung eines neuen Modells. Wir empfehlen die Verwendung des folgenden Skripts, um ein Modell hinzuzufügen
|
||||
ein bestehendes Modell:
|
||||
|
||||
- `transformers-cli add-new-model-like`, um ein neues Modell wie ein bestehendes hinzuzufügen
|
||||
- `transformers-cli add-new-model`, um ein neues Modell aus unserer Vorlage hinzuzufügen (sieht dann aus wie BERT oder Bart, je nachdem, welche Art von Modell Sie wählen)
|
||||
```bash
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
In beiden Fällen werden Sie mit einem Fragebogen aufgefordert, die grundlegenden Informationen zu Ihrem Modell auszufüllen. Für den zweiten Befehl müssen Sie `cookiecutter` installieren, weitere Informationen dazu finden Sie [hier](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
||||
Sie werden mit einem Fragebogen aufgefordert, die grundlegenden Informationen Ihres Modells einzugeben.
|
||||
|
||||
**Eröffnen Sie einen Pull Request auf dem Haupt-Repositorium huggingface/transformers**
|
||||
|
||||
|
@ -98,7 +98,7 @@ Es werden ständig neue Modelle veröffentlicht. Wenn Sie ein neues Modell imple
|
||||
|
||||
Lassen Sie es uns wissen, wenn Sie bereit sind, das Modell selbst beizutragen. Dann können wir Ihnen helfen, es zu 🤗 Transformers hinzuzufügen!
|
||||
|
||||
Wir haben eine [detaillierte Anleitung und Vorlagen](https://github.com/huggingface/transformers/tree/main/templates) hinzugefügt, um Ihnen das Hinzufügen eines neuen Modells zu erleichtern, und wir haben auch einen technischen Leitfaden dazu, [wie man ein Modell zu 🤗 Transformers hinzufügt](https://huggingface.co/docs/transformers/add_new_model).
|
||||
Wir haben auch einen technischen Leitfaden dazu, [wie man ein Modell zu 🤗 Transformers hinzufügt](https://huggingface.co/docs/transformers/add_new_model).
|
||||
|
||||
## Möchten Sie die Dokumentation erweitern?
|
||||
|
||||
|
@ -398,12 +398,14 @@ In the special case that you are adding a model whose architecture exactly match
|
||||
existing model you only have to add a conversion script as described in [this section](#write-a-conversion-script).
|
||||
In this case, you can just re-use the whole model architecture of the already existing model.
|
||||
|
||||
Otherwise, let's start generating a new model. You have two choices here:
|
||||
Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
|
||||
an existing model:
|
||||
|
||||
- `transformers-cli add-new-model-like` to add a new model like an existing one
|
||||
- `transformers-cli add-new-model` to add a new model from our template (will look like BERT or Bart depending on the type of model you select)
|
||||
```bash
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
In both cases, you will be prompted with a questionnaire to fill in the basic information of your model. The second command requires to install `cookiecutter`, you can find more information on it [here](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
||||
You will be prompted with a questionnaire to fill in the basic information of your model.
|
||||
|
||||
**Open a Pull Request on the main huggingface/transformers repo**
|
||||
|
||||
|
@ -351,13 +351,14 @@ Nel caso speciale in cui stiate aggiungendo un modello, la cui architettura sia
|
||||
dovrete solo aggiugnere uno script di conversione, come descritto [qui](#write-a-conversion-script).
|
||||
In questo caso, potete riutilizzare l'intera architettura del modello gia esistente.
|
||||
|
||||
Se questo non é il caso, cominciamo con il generare un nuovo modello. Avrete due opzioni:
|
||||
Se questo non é il caso, cominciamo con il generare un nuovo modello. Ti consigliamo di utilizzare il seguente script per aggiungere un modello a partire da
|
||||
un modello esistente:
|
||||
|
||||
- `transformers-cli add-new-model-like` per aggiungere un nuovo modello come uno che gia esiste
|
||||
- `transformers-cli add-new-model` per aggiungere un nuovo modello da un nostro template (questo assomigliera a BERT o Bart, in base al modello che selezionerete)
|
||||
```bash
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
In entrambi i casi, l'output vi darà un questionario da riempire con informazioni basi sul modello. Il secondo comando richiede di installare
|
||||
un `cookiecutter` - maggiori informazioni [qui](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
||||
Ti verrà richiesto con un questionario di compilare le informazioni di base del tuo modello.
|
||||
|
||||
**Aprire una Pull Request in main huggingface/transformers repo**
|
||||
|
||||
|
@ -307,14 +307,15 @@ cd transformers
|
||||
[このセクション](#write-a-conversion-script)で説明されているように、変換スクリプトを追加するだけで済みます。
|
||||
この場合、既存のモデルの完全なモデルアーキテクチャを再利用できます。
|
||||
|
||||
それ以外の場合、新しいモデルの生成を開始します。ここで2つの選択肢があります:
|
||||
|
||||
- `transformers-cli add-new-model-like`を使用して既存のモデルのような新しいモデルを追加します
|
||||
- `transformers-cli add-new-model`を使用して、テンプレートから新しいモデルを追加します(モデルのタイプに応じてBERTまたはBartのように見えます)
|
||||
それ以外の場合は、新しいモデルの生成を開始しましょう。 次のスクリプトを使用して、以下から始まるモデルを追加することをお勧めします。
|
||||
既存のモデル:
|
||||
|
||||
どちらの場合でも、モデルの基本情報を入力するための質問事項が表示されます。
|
||||
2番目のコマンドを実行するには、`cookiecutter`をインストールする必要があります。
|
||||
詳細については[こちら](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model)をご覧ください。
|
||||
```bash
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
モデルの基本情報を入力するためのアンケートが表示されます。
|
||||
|
||||
**主要な huggingface/transformers リポジトリでプルリクエストを開く**
|
||||
|
||||
|
@ -268,12 +268,14 @@ cd transformers
|
||||
|
||||
다음과 같이 이미 존재하는 모델의 모델 아키텍처와 정확히 일치하는 모델을 추가하는 특별한 경우에는 [이 섹션](#write-a-conversion-script)에 설명된대로 변환 스크립트만 추가하면 됩니다. 이 경우에는 이미 존재하는 모델의 전체 모델 아키텍처를 그대로 재사용할 수 있습니다.
|
||||
|
||||
그렇지 않으면 새로운 모델 생성을 시작합시다. 여기에서 두 가지 선택지가 있습니다:
|
||||
그렇지 않으면 새 모델 생성을 시작하겠습니다. 다음 스크립트를 사용하여 다음에서 시작하는 모델을 추가하는 것이 좋습니다.
|
||||
기존 모델:
|
||||
|
||||
- `transformers-cli add-new-model-like`를 사용하여 기존 모델과 유사한 새로운 모델 추가하기
|
||||
- `transformers-cli add-new-model`을 사용하여 템플릿을 기반으로 한 새로운 모델 추가하기 (선택한 모델 유형에 따라 BERT 또는 Bart와 유사한 모습일 것입니다)
|
||||
```bash
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
두 경우 모두, 모델의 기본 정보를 입력하는 설문조사가 제시됩니다. 두 번째 명령어는 `cookiecutter`를 설치해야 합니다. 자세한 정보는 [여기](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model)에서 확인할 수 있습니다.
|
||||
모델의 기본 정보를 입력하는 설문지가 표시됩니다.
|
||||
|
||||
**huggingface/transformers 메인 저장소에 Pull Request 열기**
|
||||
|
||||
|
@ -99,7 +99,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
|
||||
만약 모델을 직접 기여하고 싶으시다면, 알려주세요. 🤗 Transformers에 추가할 수 있도록 도와드리겠습니다!
|
||||
|
||||
새로운 모델을 추가하는 방법에 대한 [상세 안내서와 템플릿](https://github.com/huggingface/transformers/tree/main/templates)을 제공하고 있으며, [🤗 Transformers에 새로운 모델을 추가하는 방법](https://huggingface.co/docs/transformers/add_new_model)에 대한 기술적인 안내서도 있습니다.
|
||||
[🤗 Transformers에 새로운 모델을 추가하는 방법](https://huggingface.co/docs/transformers/add_new_model)에 대한 기술적인 안내서도 있습니다.
|
||||
|
||||
## 문서를 추가하고 싶으신가요? [[do-you-want-to-add-documentation]]
|
||||
|
||||
|
@ -98,7 +98,7 @@ python src/transformers/commands/transformers_cli.py env
|
||||
|
||||
如果你想亲自贡献模型,请告诉我们。让我们帮你把它添加到 🤗 Transformers!
|
||||
|
||||
我们已经添加了[详细的指南和模板](https://github.com/huggingface/transformers/tree/main/templates)来帮助你添加新模型。我们还有一个更技术性的指南,告诉你[如何将模型添加到 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model)。
|
||||
我们还有一个更技术性的指南,告诉你[如何将模型添加到 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model)。
|
||||
|
||||
## 你想要添加文档吗?
|
||||
|
||||
|
@ -1,259 +0,0 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import warnings
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from ..utils import logging
|
||||
from . import BaseTransformersCLICommand
|
||||
|
||||
|
||||
try:
|
||||
from cookiecutter.main import cookiecutter
|
||||
|
||||
_has_cookiecutter = True
|
||||
except ImportError:
|
||||
_has_cookiecutter = False
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
def add_new_model_command_factory(args: Namespace):
|
||||
return AddNewModelCommand(args.testing, args.testing_file, path=args.path)
|
||||
|
||||
|
||||
class AddNewModelCommand(BaseTransformersCLICommand):
|
||||
@staticmethod
|
||||
def register_subcommand(parser: ArgumentParser):
|
||||
add_new_model_parser = parser.add_parser("add-new-model")
|
||||
add_new_model_parser.add_argument("--testing", action="store_true", help="If in testing mode.")
|
||||
add_new_model_parser.add_argument("--testing_file", type=str, help="Configuration file on which to run.")
|
||||
add_new_model_parser.add_argument(
|
||||
"--path", type=str, help="Path to cookiecutter. Should only be used for testing purposes."
|
||||
)
|
||||
add_new_model_parser.set_defaults(func=add_new_model_command_factory)
|
||||
|
||||
def __init__(self, testing: bool, testing_file: str, path=None, *args):
|
||||
self._testing = testing
|
||||
self._testing_file = testing_file
|
||||
self._path = path
|
||||
|
||||
def run(self):
|
||||
warnings.warn(
|
||||
"The command `transformers-cli add-new-model` is deprecated and will be removed in v5 of Transformers. "
|
||||
"It is not actively maintained anymore, so might give a result that won't pass all tests and quality "
|
||||
"checks, you should use `transformers-cli add-new-model-like` instead."
|
||||
)
|
||||
if not _has_cookiecutter:
|
||||
raise ImportError(
|
||||
"Model creation dependencies are required to use the `add_new_model` command. Install them by running "
|
||||
"the following at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
|
||||
)
|
||||
# Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
|
||||
directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
|
||||
if len(directories) > 0:
|
||||
raise ValueError(
|
||||
"Several directories starting with `cookiecutter-template-` in current working directory. "
|
||||
"Please clean your directory by removing all folders starting with `cookiecutter-template-` or "
|
||||
"change your working directory."
|
||||
)
|
||||
|
||||
path_to_transformer_root = (
|
||||
Path(__file__).parent.parent.parent.parent if self._path is None else Path(self._path).parent.parent
|
||||
)
|
||||
path_to_cookiecutter = path_to_transformer_root / "templates" / "adding_a_new_model"
|
||||
|
||||
# Execute cookiecutter
|
||||
if not self._testing:
|
||||
cookiecutter(str(path_to_cookiecutter))
|
||||
else:
|
||||
with open(self._testing_file, "r") as configuration_file:
|
||||
testing_configuration = json.load(configuration_file)
|
||||
|
||||
cookiecutter(
|
||||
str(path_to_cookiecutter if self._path is None else self._path),
|
||||
no_input=True,
|
||||
extra_context=testing_configuration,
|
||||
)
|
||||
|
||||
directory = [directory for directory in os.listdir() if "cookiecutter-template-" in directory[:22]][0]
|
||||
|
||||
# Retrieve configuration
|
||||
with open(directory + "/configuration.json", "r") as configuration_file:
|
||||
configuration = json.load(configuration_file)
|
||||
|
||||
lowercase_model_name = configuration["lowercase_modelname"]
|
||||
generate_tensorflow_pytorch_and_flax = configuration["generate_tensorflow_pytorch_and_flax"]
|
||||
os.remove(f"{directory}/configuration.json")
|
||||
|
||||
output_pytorch = "PyTorch" in generate_tensorflow_pytorch_and_flax
|
||||
output_tensorflow = "TensorFlow" in generate_tensorflow_pytorch_and_flax
|
||||
output_flax = "Flax" in generate_tensorflow_pytorch_and_flax
|
||||
|
||||
model_dir = f"{path_to_transformer_root}/src/transformers/models/{lowercase_model_name}"
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
os.makedirs(f"{path_to_transformer_root}/tests/models/{lowercase_model_name}", exist_ok=True)
|
||||
|
||||
# Tests require submodules as they have parent imports
|
||||
with open(f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/__init__.py", "w"):
|
||||
pass
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/__init__.py",
|
||||
f"{model_dir}/__init__.py",
|
||||
)
|
||||
shutil.move(
|
||||
f"{directory}/configuration_{lowercase_model_name}.py",
|
||||
f"{model_dir}/configuration_{lowercase_model_name}.py",
|
||||
)
|
||||
|
||||
def remove_copy_lines(path):
|
||||
with open(path, "r") as f:
|
||||
lines = f.readlines()
|
||||
with open(path, "w") as f:
|
||||
for line in lines:
|
||||
if "# Copied from transformers." not in line:
|
||||
f.write(line)
|
||||
|
||||
if output_pytorch:
|
||||
if not self._testing:
|
||||
remove_copy_lines(f"{directory}/modeling_{lowercase_model_name}.py")
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/modeling_{lowercase_model_name}.py",
|
||||
f"{model_dir}/modeling_{lowercase_model_name}.py",
|
||||
)
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/test_modeling_{lowercase_model_name}.py",
|
||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_{lowercase_model_name}.py",
|
||||
)
|
||||
else:
|
||||
os.remove(f"{directory}/modeling_{lowercase_model_name}.py")
|
||||
os.remove(f"{directory}/test_modeling_{lowercase_model_name}.py")
|
||||
|
||||
if output_tensorflow:
|
||||
if not self._testing:
|
||||
remove_copy_lines(f"{directory}/modeling_tf_{lowercase_model_name}.py")
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/modeling_tf_{lowercase_model_name}.py",
|
||||
f"{model_dir}/modeling_tf_{lowercase_model_name}.py",
|
||||
)
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/test_modeling_tf_{lowercase_model_name}.py",
|
||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_tf_{lowercase_model_name}.py",
|
||||
)
|
||||
else:
|
||||
os.remove(f"{directory}/modeling_tf_{lowercase_model_name}.py")
|
||||
os.remove(f"{directory}/test_modeling_tf_{lowercase_model_name}.py")
|
||||
|
||||
if output_flax:
|
||||
if not self._testing:
|
||||
remove_copy_lines(f"{directory}/modeling_flax_{lowercase_model_name}.py")
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/modeling_flax_{lowercase_model_name}.py",
|
||||
f"{model_dir}/modeling_flax_{lowercase_model_name}.py",
|
||||
)
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/test_modeling_flax_{lowercase_model_name}.py",
|
||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_flax_{lowercase_model_name}.py",
|
||||
)
|
||||
else:
|
||||
os.remove(f"{directory}/modeling_flax_{lowercase_model_name}.py")
|
||||
os.remove(f"{directory}/test_modeling_flax_{lowercase_model_name}.py")
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/{lowercase_model_name}.md",
|
||||
f"{path_to_transformer_root}/docs/source/en/model_doc/{lowercase_model_name}.md",
|
||||
)
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/tokenization_{lowercase_model_name}.py",
|
||||
f"{model_dir}/tokenization_{lowercase_model_name}.py",
|
||||
)
|
||||
|
||||
shutil.move(
|
||||
f"{directory}/tokenization_fast_{lowercase_model_name}.py",
|
||||
f"{model_dir}/tokenization_{lowercase_model_name}_fast.py",
|
||||
)
|
||||
|
||||
from os import fdopen, remove
|
||||
from shutil import copymode, move
|
||||
from tempfile import mkstemp
|
||||
|
||||
def replace(original_file: str, line_to_copy_below: str, lines_to_copy: List[str]):
|
||||
# Create temp file
|
||||
fh, abs_path = mkstemp()
|
||||
line_found = False
|
||||
with fdopen(fh, "w") as new_file:
|
||||
with open(original_file) as old_file:
|
||||
for line in old_file:
|
||||
new_file.write(line)
|
||||
if line_to_copy_below in line:
|
||||
line_found = True
|
||||
for line_to_copy in lines_to_copy:
|
||||
new_file.write(line_to_copy)
|
||||
|
||||
if not line_found:
|
||||
raise ValueError(f"Line {line_to_copy_below} was not found in file.")
|
||||
|
||||
# Copy the file permissions from the old file to the new file
|
||||
copymode(original_file, abs_path)
|
||||
# Remove original file
|
||||
remove(original_file)
|
||||
# Move new file
|
||||
move(abs_path, original_file)
|
||||
|
||||
def skip_units(line):
|
||||
return (
|
||||
("generating PyTorch" in line and not output_pytorch)
|
||||
or ("generating TensorFlow" in line and not output_tensorflow)
|
||||
or ("generating Flax" in line and not output_flax)
|
||||
)
|
||||
|
||||
def replace_in_files(path_to_datafile):
|
||||
with open(path_to_datafile) as datafile:
|
||||
lines_to_copy = []
|
||||
skip_file = False
|
||||
skip_snippet = False
|
||||
for line in datafile:
|
||||
if "# To replace in: " in line and "##" not in line:
|
||||
file_to_replace_in = line.split('"')[1]
|
||||
skip_file = skip_units(line)
|
||||
elif "# Below: " in line and "##" not in line:
|
||||
line_to_copy_below = line.split('"')[1]
|
||||
skip_snippet = skip_units(line)
|
||||
elif "# End." in line and "##" not in line:
|
||||
if not skip_file and not skip_snippet:
|
||||
replace(file_to_replace_in, line_to_copy_below, lines_to_copy)
|
||||
|
||||
lines_to_copy = []
|
||||
elif "# Replace with" in line and "##" not in line:
|
||||
lines_to_copy = []
|
||||
elif "##" not in line:
|
||||
lines_to_copy.append(line)
|
||||
|
||||
remove(path_to_datafile)
|
||||
|
||||
replace_in_files(f"{directory}/to_replace_{lowercase_model_name}.py")
|
||||
os.rmdir(directory)
|
@ -15,7 +15,6 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from .add_new_model import AddNewModelCommand
|
||||
from .add_new_model_like import AddNewModelLikeCommand
|
||||
from .convert import ConvertCommand
|
||||
from .download import DownloadCommand
|
||||
@ -38,7 +37,6 @@ def main():
|
||||
RunCommand.register_subcommand(commands_parser)
|
||||
ServeCommand.register_subcommand(commands_parser)
|
||||
UserCommands.register_subcommand(commands_parser)
|
||||
AddNewModelCommand.register_subcommand(commands_parser)
|
||||
AddNewModelLikeCommand.register_subcommand(commands_parser)
|
||||
LfsCommands.register_subcommand(commands_parser)
|
||||
PTtoTFCommand.register_subcommand(commands_parser)
|
||||
|
@ -16,257 +16,8 @@ limitations under the License.
|
||||
|
||||
# Adding a new model
|
||||
|
||||
This folder contains templates to generate new models that fit the current API and pass all tests. It generates
|
||||
models in both PyTorch, TensorFlow, and Flax and completes the `__init__.py` and auto-modeling files, and creates the
|
||||
documentation. Their use is described in the [next section](#cookiecutter-templates).
|
||||
This page has been updated in light of the removal of the `add_new_model` script in favor of the more complete
|
||||
`add_new_model_like` script.
|
||||
|
||||
There is also a CLI tool to generate a new model like an existing one called `transformers-cli add-new-model-like`.
|
||||
Jump to the [Add new model like section](#add-new-model-like-command) to learn how to use it.
|
||||
|
||||
## Cookiecutter Templates
|
||||
|
||||
Using the `cookiecutter` utility requires to have all the `dev` dependencies installed. Let's first clone the
|
||||
repository and install it in our environment:
|
||||
|
||||
```shell script
|
||||
git clone https://github.com/huggingface/transformers
|
||||
cd transformers
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
||||
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
|
||||
(PyTorch, TensorFlow and/or Flax) then do:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
```
|
||||
|
||||
Once the installation is done, you can use the CLI command `add-new-model` to generate your models:
|
||||
|
||||
```shell script
|
||||
transformers-cli add-new-model
|
||||
```
|
||||
|
||||
This should launch the `cookiecutter` package which should prompt you to fill in the configuration.
|
||||
|
||||
The `modelname` should be cased according to the plain text casing, i.e., BERT, RoBERTa, DeBERTa.
|
||||
```
|
||||
modelname [<ModelNAME>]:
|
||||
uppercase_modelname [<MODEL_NAME>]:
|
||||
lowercase_modelname [<model_name>]:
|
||||
camelcase_modelname [<ModelName>]:
|
||||
```
|
||||
|
||||
Fill in the `authors` with your team members:
|
||||
```
|
||||
authors [The HuggingFace Team]:
|
||||
```
|
||||
|
||||
The checkpoint identifier is the checkpoint that will be used in the examples across the files. Put the name you wish,
|
||||
as it will appear on the modelhub. Do not forget to include the organisation.
|
||||
```
|
||||
checkpoint_identifier [organisation/<model_name>-base-cased]:
|
||||
```
|
||||
|
||||
The tokenizer should either be based on BERT if it behaves exactly like the BERT tokenizer, or a standalone otherwise.
|
||||
```
|
||||
Select tokenizer_type:
|
||||
1 - Based on BERT
|
||||
2 - Standalone
|
||||
Choose from 1, 2 [1]:
|
||||
```
|
||||
<!---
|
||||
Choose if your model is an encoder-decoder, or an encoder-only architecture.
|
||||
|
||||
If your model is an encoder-only architecture, the generated architecture will be based on the BERT model.
|
||||
If your model is an encoder-decoder architecture, the generated architecture will be based on the BART model. You can,
|
||||
of course, edit the files once the generation is complete.
|
||||
```
|
||||
Select is_encoder_decoder_model:
|
||||
1 - True
|
||||
2 - False
|
||||
Choose from 1, 2 [1]:
|
||||
```
|
||||
-->
|
||||
|
||||
Once the command has finished, you should have a total of 7 new files spread across the repository:
|
||||
```
|
||||
docs/source/model_doc/<model_name>.md
|
||||
src/transformers/models/<model_name>/configuration_<model_name>.py
|
||||
src/transformers/models/<model_name>/modeling_<model_name>.py
|
||||
src/transformers/models/<model_name>/modeling_tf_<model_name>.py
|
||||
src/transformers/models/<model_name>/tokenization_<model_name>.py
|
||||
tests/models/<model_name>/test_modeling_<model_name>.py
|
||||
tests/models/<model_name>/test_modeling_tf_<model_name>.py
|
||||
```
|
||||
|
||||
You can run the tests to ensure that they all pass:
|
||||
|
||||
```bash
|
||||
python -m pytest ./tests/test_*<model_name>*.py
|
||||
```
|
||||
|
||||
Feel free to modify each file to mimic the behavior of your model.
|
||||
|
||||
⚠ You should be careful about the classes preceded by the following line:️
|
||||
|
||||
```python
|
||||
# Copied from transformers.[...]
|
||||
```
|
||||
|
||||
This line ensures that the copy does not diverge from the source. If it *should* diverge, because the implementation
|
||||
is different, this line needs to be deleted. If you don't delete this line and run `make fix-copies`,
|
||||
your changes will be overwritten.
|
||||
|
||||
Once you have edited the files to fit your architecture, simply re-run the tests (and edit them if a change
|
||||
is needed!) afterwards to make sure everything works as expected.
|
||||
|
||||
Once the files are generated and you are happy with your changes, here's a checklist to ensure that your contribution
|
||||
will be merged quickly:
|
||||
|
||||
- You should run the `make fixup` utility to fix the style of the files and to ensure the code quality meets the
|
||||
library's standards.
|
||||
- You should complete the documentation file (`docs/source/model_doc/<model_name>.rst`) so that your model may be
|
||||
usable.
|
||||
|
||||
## Add new model like command
|
||||
|
||||
Using the `transformers-cli add-new-model-like` command requires to have all the `dev` dependencies installed. Let's
|
||||
first clone the repository and install it in our environment:
|
||||
|
||||
```shell script
|
||||
git clone https://github.com/huggingface/transformers
|
||||
cd transformers
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
||||
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
|
||||
(PyTorch, TensorFlow and/or Flax) then do:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
```
|
||||
|
||||
Once the installation is done, you can use the CLI command `add-new-model-like` to generate your models:
|
||||
|
||||
```shell script
|
||||
transformers-cli add-new-model-like
|
||||
```
|
||||
|
||||
This will start a small questionnaire you have to fill.
|
||||
|
||||
```
|
||||
What identifier would you like to use for the model type of this model?
|
||||
```
|
||||
|
||||
You will have to input the model type of the model you want to clone. The model type can be found in several places:
|
||||
- inside the configuration of any checkpoint of that model
|
||||
- the name of the documentation page of that model
|
||||
|
||||
For instance the doc page of `BigBirdPegasus` is `https://huggingface.co/docs/transformers/model_doc/bigbird_pegasus`
|
||||
so its model type is `"bigbird_pegasus"`.
|
||||
|
||||
If you make a typo, the command will suggest you the closest model types it can find.
|
||||
|
||||
Once this is done, the questionnaire will ask you for the new model name and its various casings:
|
||||
|
||||
```
|
||||
What is the name for your new model?
|
||||
What identifier would you like to use for the model type of this model?
|
||||
What name would you like to use for the module of this model?
|
||||
What prefix (camel-cased) would you like to use for the model classes of this model?
|
||||
What prefix (upper-cased) would you like to use for the constants relative to this model?
|
||||
```
|
||||
|
||||
From your answer to the first question, defaults will be determined for all others. The first name should be written
|
||||
as you want your model be named in the doc, with no special casing (like RoBERTa) and from there, you can either stick
|
||||
with the defaults or change the cased versions.
|
||||
|
||||
Next will be the name of the config class to use for this model:
|
||||
|
||||
```
|
||||
What will be the name of the config class for this model?
|
||||
```
|
||||
|
||||
Then, you will be asked for a checkpoint identifier:
|
||||
|
||||
```
|
||||
Please give a checkpoint identifier (on the model Hub) for this new model.
|
||||
```
|
||||
|
||||
This is the checkpoint that will be used in the examples across the files and the integration tests. Put the name you
|
||||
wish, as it will appear on the Model Hub. Do not forget to include the organisation.
|
||||
|
||||
Then you will have to say whether your model re-uses the same processing classes as the model you're cloning:
|
||||
|
||||
```
|
||||
Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor/XxxImageProcessor)
|
||||
```
|
||||
|
||||
Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different
|
||||
files (for instance you can reuse the `BertTokenizer` with a new vocab file).
|
||||
|
||||
If you answer no, you will have to give the name of the classes
|
||||
for the new tokenizer/image processor/feature extractor/processor (depending on the model you're cloning).
|
||||
|
||||
Next the questionnaire will ask
|
||||
|
||||
```
|
||||
Should we add # Copied from statements when creating the new modeling file?
|
||||
```
|
||||
|
||||
This is the internal mechanism used in the library to make sure code copied from various modeling files stay consistent.
|
||||
If you plan to completely rewrite the modeling file, you should answer no, whereas if you just want to tweak one part
|
||||
of the model, you should answer yes.
|
||||
|
||||
Lastly, the questionnaire will inquire about frameworks:
|
||||
|
||||
```
|
||||
Should we add a version of your new model in all the frameworks implemented by Old Model (xxx)?
|
||||
```
|
||||
|
||||
If you answer yes, the new model will have files for all the frameworks implemented by the model you're cloning.
|
||||
Otherwise, you will get a new question to select the frameworks you want.
|
||||
|
||||
Once the command has finished, you will see a new subfolder in the `src/transformers/models/` folder, with the
|
||||
necessary files (configuration and modeling files for all frameworks requested, and maybe the processing files,
|
||||
depending on your choices).
|
||||
|
||||
You will also see a doc file and tests for your new models. First you should run
|
||||
|
||||
```bash
|
||||
make style
|
||||
make fix-copies
|
||||
```
|
||||
|
||||
and then you can start tweaking your model. You should:
|
||||
- fill the doc file at `docs/source/model_doc/model_name.md`
|
||||
- tweak the configuration and modeling files to your need
|
||||
|
||||
Once you're done, you can run the tests to ensure that they all pass:
|
||||
|
||||
```bash
|
||||
python -m pytest ./tests/test_*<model_name>*.py
|
||||
```
|
||||
|
||||
⚠ You should be careful about the classes preceded by the following line:️
|
||||
|
||||
```python
|
||||
# Copied from transformers.[...]
|
||||
```
|
||||
|
||||
This line ensures that the copy does not diverge from the source. If it *should* diverge, because the implementation
|
||||
is different, this line needs to be deleted. If you don't delete this line and run `make fix-copies`,
|
||||
your changes will be overwritten.
|
||||
|
||||
Once you have edited the files to fit your architecture, simply re-run the tests (and edit them if a change
|
||||
is needed!) afterwards to make sure everything works as expected.
|
||||
|
||||
Once the files are generated and you are happy with your changes, here's a checklist to ensure that your contribution
|
||||
will be merged quickly:
|
||||
|
||||
- You should run the `make fixup` utility to fix the style of the files and to ensure the code quality meets the
|
||||
library's standards.
|
||||
- You should add your model to the main README then run `make fix-copies`.
|
||||
We recommend you checkout the documentation of [How to add a model](https://huggingface.co/docs/transformers/main/en/add_new_model)
|
||||
in the Hugging Face Transformers documentation for complete and up-to-date instructions.
|
||||
|
@ -1,286 +0,0 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import _LazyModule, OptionalDependencyNotAvailable, is_tokenizers_available
|
||||
|
||||
|
||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
from ...utils import is_tf_available
|
||||
|
||||
|
||||
{% endif %}
|
||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
from ...utils import is_torch_available
|
||||
|
||||
|
||||
{% endif %}
|
||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
from ...utils import is_flax_available
|
||||
|
||||
|
||||
{% endif %}
|
||||
|
||||
_import_structure = {
|
||||
"configuration_{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP", "{{cookiecutter.camelcase_modelname}}Config"],
|
||||
"tokenization_{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
||||
}
|
||||
|
||||
try:
|
||||
if not is_tokenizers_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["tokenization_{{cookiecutter.lowercase_modelname}}_fast"] = ["{{cookiecutter.camelcase_modelname}}TokenizerFast"]
|
||||
|
||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_torch_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"{{cookiecutter.camelcase_modelname}}Model",
|
||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
"load_tf_weights_in_{{cookiecutter.lowercase_modelname}}",
|
||||
]
|
||||
{% else %}
|
||||
try:
|
||||
if not is_torch_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}Model",
|
||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_tf_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_tf_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
{% else %}
|
||||
try:
|
||||
if not is_tf_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_tf_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_flax_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_flax_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
{% else %}
|
||||
try:
|
||||
if not is_flax_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_flax_{{cookiecutter.lowercase_modelname}}"] = [
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP, {{cookiecutter.camelcase_modelname}}Config
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
try:
|
||||
if not is_tokenizers_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
|
||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_torch_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Layer,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||
)
|
||||
{% else %}
|
||||
try:
|
||||
if not is_torch_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_tf_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}Layer,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
try:
|
||||
if not is_tf_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
try:
|
||||
if not is_flax_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Layer,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
try:
|
||||
if not is_flax_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
else:
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "{{cookiecutter.modelname}}",
|
||||
"uppercase_modelname": "{{cookiecutter.uppercase_modelname}}",
|
||||
"lowercase_modelname": "{{cookiecutter.lowercase_modelname}}",
|
||||
"camelcase_modelname": "{{cookiecutter.camelcase_modelname}}",
|
||||
"authors": "{{cookiecutter.authors}}",
|
||||
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
|
||||
"tokenizer_type": "{{cookiecutter.tokenizer_type}}",
|
||||
"generate_tensorflow_pytorch_and_flax": "{{cookiecutter.generate_tensorflow_pytorch_and_flax}}",
|
||||
"is_encoder_decoder_model": "{{cookiecutter.is_encoder_decoder_model}}"
|
||||
}
|
@ -1,235 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" {{cookiecutter.modelname}} model configuration """
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`~{{cookiecutter.camelcase_modelname}}Model`].
|
||||
It is used to instantiate an {{cookiecutter.modelname}} model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the {{cookiecutter.modelname}} [{{cookiecutter.checkpoint_identifier}}](https://huggingface.co/{{cookiecutter.checkpoint_identifier}}) architecture.
|
||||
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used
|
||||
to control the model outputs. Read the documentation from [`PretrainedConfig`]
|
||||
for more information.
|
||||
|
||||
|
||||
Args:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
vocab_size (`int`, *optional*, defaults to 30522):
|
||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
||||
hidden_size (`int`, *optional*, defaults to 768):
|
||||
Dimension of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 12):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
intermediate_size (`int`, *optional*, defaults to 3072):
|
||||
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
max_position_embeddings (`int`, *optional*, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (`int`, *optional*, defaults to 2):
|
||||
The vocabulary size of the `token_type_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
|
||||
The epsilon used by the layer normalization layers.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||
relevant if `config.is_decoder=True`.
|
||||
{% else -%}
|
||||
vocab_size (`int`, *optional*, defaults to 50265):
|
||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
||||
d_model (`int`, *optional*, defaults to 1024):
|
||||
Dimension of the layers and the pooler layer.
|
||||
encoder_layers (`int`, *optional*, defaults to 12):
|
||||
Number of encoder layers.
|
||||
decoder_layers (`int`, *optional*, defaults to 12):
|
||||
Number of decoder layers.
|
||||
encoder_attention_heads (`int`, *optional*, defaults to 16):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
decoder_attention_heads (`int`, *optional*, defaults to 16):
|
||||
Number of attention heads for each attention layer in the Transformer decoder.
|
||||
decoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the "intermediate" (often named feed-forward) layer in decoder.
|
||||
encoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the "intermediate" (often named feed-forward) layer in decoder.
|
||||
activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler. If string,
|
||||
`"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
|
||||
dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
classifier_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for classifier.
|
||||
max_position_embeddings (`int`, *optional*, defaults to 1024):
|
||||
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see
|
||||
https://arxiv.org/abs/1909.11556) for more details.
|
||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see
|
||||
https://arxiv.org/abs/1909.11556) for more details.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||
{% endif -%}
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from transformers import {{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}Config
|
||||
|
||||
>>> # Initializing a {{cookiecutter.modelname}} {{cookiecutter.checkpoint_identifier}} style configuration
|
||||
>>> configuration = {{cookiecutter.camelcase_modelname}}Config()
|
||||
|
||||
>>> # Initializing a model from the {{cookiecutter.checkpoint_identifier}} style configuration
|
||||
>>> model = {{cookiecutter.camelcase_modelname}}Model(configuration)
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```
|
||||
"""
|
||||
model_type = "{{cookiecutter.lowercase_modelname}}"
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else -%}
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
{% endif -%}
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
{%- else %}
|
||||
attribute_map = {
|
||||
"num_attention_heads": "encoder_attention_heads",
|
||||
"hidden_size": "d_model"
|
||||
}
|
||||
|
||||
{%- endif %}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
vocab_size=30522,
|
||||
hidden_size=768,
|
||||
num_hidden_layers=12,
|
||||
num_attention_heads=12,
|
||||
intermediate_size=3072,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=2,
|
||||
initializer_range=0.02,
|
||||
layer_norm_eps=1e-12,
|
||||
use_cache=True,
|
||||
{% else -%}
|
||||
vocab_size=50265,
|
||||
max_position_embeddings=1024,
|
||||
encoder_layers=12,
|
||||
encoder_ffn_dim=4096,
|
||||
encoder_attention_heads=16,
|
||||
decoder_layers=12,
|
||||
decoder_ffn_dim=4096,
|
||||
decoder_attention_heads=16,
|
||||
encoder_layerdrop=0.0,
|
||||
decoder_layerdrop=0.0,
|
||||
use_cache=True,
|
||||
is_encoder_decoder=True,
|
||||
activation_function="gelu",
|
||||
d_model=1024,
|
||||
dropout=0.1,
|
||||
attention_dropout=0.0,
|
||||
activation_dropout=0.0,
|
||||
init_std=0.02,
|
||||
decoder_start_token_id=2,
|
||||
classifier_dropout=0.0,
|
||||
scale_embedding=False,
|
||||
{% endif -%}
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
**kwargs
|
||||
):
|
||||
self.vocab_size = vocab_size
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.initializer_range = initializer_range
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
self.use_cache = use_cache
|
||||
{% else -%}
|
||||
self.d_model = d_model
|
||||
self.encoder_ffn_dim = encoder_ffn_dim
|
||||
self.encoder_layers = encoder_layers
|
||||
self.encoder_attention_heads = encoder_attention_heads
|
||||
self.decoder_ffn_dim = decoder_ffn_dim
|
||||
self.decoder_layers = decoder_layers
|
||||
self.decoder_attention_heads = decoder_attention_heads
|
||||
self.dropout = dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
self.activation_function = activation_function
|
||||
self.init_std = init_std
|
||||
self.encoder_layerdrop = encoder_layerdrop
|
||||
self.decoder_layerdrop = decoder_layerdrop
|
||||
self.classifier_dropout = classifier_dropout
|
||||
self.use_cache = use_cache
|
||||
self.num_hidden_layers = encoder_layers
|
||||
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
||||
|
||||
{% endif -%}
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else -%}
|
||||
is_encoder_decoder=is_encoder_decoder,
|
||||
decoder_start_token_id=decoder_start_token_id,
|
||||
{% endif -%}
|
||||
**kwargs
|
||||
)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,669 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_flax_available, {{cookiecutter.camelcase_modelname}}Config
|
||||
from transformers.testing_utils import require_flax, slow
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor
|
||||
|
||||
if is_flax_available():
|
||||
import numpy as np
|
||||
from transformers import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = 13
|
||||
self.seq_length = 7
|
||||
self.is_training = True
|
||||
self.use_input_mask = True
|
||||
self.use_token_type_ids = True
|
||||
self.use_labels = True
|
||||
self.vocab_size = 99
|
||||
self.hidden_size = 32
|
||||
self.num_hidden_layers = 5
|
||||
self.num_attention_heads = 4
|
||||
self.intermediate_size = 37
|
||||
self.hidden_act = "gelu"
|
||||
self.hidden_dropout_prob = 0.1
|
||||
self.attention_probs_dropout_prob = 0.1
|
||||
self.max_position_embeddings = 512
|
||||
self.type_vocab_size = 16
|
||||
self.type_sequence_label_size = 2
|
||||
self.initializer_range = 0.02
|
||||
self.num_labels = 3
|
||||
self.num_choices = 4
|
||||
self.scope = None
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}Model(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
|
||||
result = model(*inputs)
|
||||
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_lm_head(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.is_decoder = True
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
prediction_scores = model(**inputs)["logits"]
|
||||
self.parent.assertListEqual(
|
||||
list(prediction_scores.shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
def create_and_check_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
result = model(**inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
result = model(**inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
|
||||
def create_and_check_for_multiple_choice(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
|
||||
multiple_choice_inputs_ids = np.tile(np.expand_dims(input_ids, 1), (1, self.num_choices, 1))
|
||||
multiple_choice_input_mask = np.tile(np.expand_dims(input_mask, 1), (1, self.num_choices, 1))
|
||||
multiple_choice_token_type_ids = np.tile(np.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
|
||||
inputs = {
|
||||
"input_ids": multiple_choice_inputs_ids,
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
result = model(**inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
|
||||
def create_and_check_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
result = model(**inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
result = model(**inputs)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_flax
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (
|
||||
(
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
)
|
||||
if is_flax_available()
|
||||
else ()
|
||||
)
|
||||
|
||||
test_head_masking = False
|
||||
test_onnx = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = Flax{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_model(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||
|
||||
def test_for_masked_lm(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
||||
|
||||
def test_for_causal_lm(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_lm_head(*config_and_inputs)
|
||||
|
||||
def test_for_multiple_choice(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
|
||||
|
||||
def test_for_question_answering(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
|
||||
|
||||
def test_for_sequence_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
|
||||
|
||||
def test_for_token_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}Model.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
|
||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||
if a is None and b is None:
|
||||
return True
|
||||
try:
|
||||
if _assert_tensors_equal(a, b, atol=atol):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
@require_flax
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_masked_lm(self):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
||||
input_ids = np.array([[0, 1, 2, 3, 4, 5]])
|
||||
output = model(input_ids)[0]
|
||||
|
||||
# TODO Replace vocab size
|
||||
vocab_size = 32000
|
||||
|
||||
expected_shape = [1, 6, vocab_size]
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
|
||||
print(output[:, :3, :3])
|
||||
|
||||
# TODO Replace values below with what was printed above.
|
||||
expected_slice = np.array(
|
||||
[
|
||||
[
|
||||
[-0.05243197, -0.04498899, 0.05512108],
|
||||
[-0.07444685, -0.01064632, 0.04352357],
|
||||
[-0.05020351, 0.05530146, 0.00700043],
|
||||
]
|
||||
]
|
||||
)
|
||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||
|
||||
{% else %}
|
||||
import unittest
|
||||
|
||||
from transformers import (
|
||||
is_flax_available,
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
||||
)
|
||||
from transformers.testing_utils import require_sentencepiece, require_flax, require_tokenizers, slow
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor
|
||||
|
||||
|
||||
if is_flax_available():
|
||||
import numpy as np
|
||||
import jax.numpy as jnp
|
||||
from transformers import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
|
||||
@require_flax
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
||||
config_updates = {}
|
||||
hidden_act = "gelu"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_labels=False,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=20,
|
||||
eos_token_id=2,
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size).clip(3, self.vocab_size)
|
||||
eos_tensor = np.expand_dims(np.array([self.eos_token_id] * self.batch_size), 1)
|
||||
input_ids = np.concatenate([input_ids, eos_tensor], axis=1)
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = self.config_cls(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
decoder_layers=self.num_hidden_layers,
|
||||
encoder_attention_heads=self.num_attention_heads,
|
||||
decoder_attention_heads=self.num_attention_heads,
|
||||
encoder_ffn_dim=self.intermediate_size,
|
||||
decoder_ffn_dim=self.intermediate_size,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
eos_token_ids=[2],
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.pad_token_id,
|
||||
**self.config_updates,
|
||||
)
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def check_use_cache_forward(self, model_class_name, config, inputs_dict):
|
||||
max_decoder_length = 20
|
||||
model = model_class_name(config)
|
||||
|
||||
encoder_outputs = model.encode(inputs_dict["input_ids"])
|
||||
|
||||
decoder_input_ids, decoder_attention_mask = (
|
||||
inputs_dict["decoder_input_ids"],
|
||||
inputs_dict["decoder_attention_mask"],
|
||||
)
|
||||
|
||||
past_key_values = model.init_cache(decoder_input_ids.shape[0], max_decoder_length, encoder_outputs)
|
||||
decoder_attention_mask = jnp.ones((decoder_input_ids.shape[0], max_decoder_length), dtype="i4")
|
||||
|
||||
decoder_position_ids = jnp.broadcast_to(
|
||||
jnp.arange(decoder_input_ids.shape[-1] - 1)[None, :],
|
||||
(decoder_input_ids.shape[0], decoder_input_ids.shape[-1] - 1),
|
||||
)
|
||||
outputs_cache = model.decode(
|
||||
decoder_input_ids[:, :-1],
|
||||
encoder_outputs,
|
||||
decoder_attention_mask=decoder_attention_mask,
|
||||
past_key_values=past_key_values,
|
||||
decoder_position_ids=decoder_position_ids,
|
||||
)
|
||||
|
||||
decoder_position_ids = jnp.array(decoder_input_ids.shape[0] * [[decoder_input_ids.shape[-1] - 1]], dtype="i4")
|
||||
outputs_cache_next = model.decode(
|
||||
decoder_input_ids[:, -1:],
|
||||
encoder_outputs,
|
||||
decoder_attention_mask=decoder_attention_mask,
|
||||
past_key_values=outputs_cache.past_key_values,
|
||||
decoder_position_ids=decoder_position_ids,
|
||||
)
|
||||
|
||||
outputs = model.decode(decoder_input_ids, encoder_outputs)
|
||||
|
||||
diff = np.max(np.abs((outputs_cache_next[0][:, -1, :5] - outputs[0][:, -1, :5])))
|
||||
self.parent.assertTrue(diff < 1e-3, msg=f"Max diff is {diff}")
|
||||
|
||||
def check_use_cache_forward_with_attn_mask(self, model_class_name, config, inputs_dict):
|
||||
max_decoder_length = 20
|
||||
model = model_class_name(config)
|
||||
|
||||
encoder_outputs = model.encode(inputs_dict["input_ids"])
|
||||
|
||||
decoder_input_ids, decoder_attention_mask = (
|
||||
inputs_dict["decoder_input_ids"],
|
||||
inputs_dict["decoder_attention_mask"],
|
||||
)
|
||||
|
||||
decoder_attention_mask_cache = jnp.concatenate(
|
||||
[
|
||||
decoder_attention_mask,
|
||||
jnp.zeros((decoder_attention_mask.shape[0], max_decoder_length - decoder_attention_mask.shape[1])),
|
||||
],
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
past_key_values = model.init_cache(decoder_input_ids.shape[0], max_decoder_length, encoder_outputs)
|
||||
decoder_position_ids = jnp.broadcast_to(
|
||||
jnp.arange(decoder_input_ids.shape[-1] - 1)[None, :],
|
||||
(decoder_input_ids.shape[0], decoder_input_ids.shape[-1] - 1),
|
||||
)
|
||||
|
||||
outputs_cache = model.decode(
|
||||
decoder_input_ids[:, :-1],
|
||||
encoder_outputs,
|
||||
decoder_attention_mask=decoder_attention_mask_cache,
|
||||
past_key_values=past_key_values,
|
||||
decoder_position_ids=decoder_position_ids,
|
||||
)
|
||||
decoder_position_ids = jnp.array(decoder_input_ids.shape[0] * [[decoder_input_ids.shape[-1] - 1]], dtype="i4")
|
||||
outputs_cache_next = model.decode(
|
||||
decoder_input_ids[:, -1:],
|
||||
encoder_outputs,
|
||||
past_key_values=outputs_cache.past_key_values,
|
||||
decoder_attention_mask=decoder_attention_mask_cache,
|
||||
decoder_position_ids=decoder_position_ids,
|
||||
)
|
||||
|
||||
outputs = model.decode(decoder_input_ids, encoder_outputs, decoder_attention_mask=decoder_attention_mask)
|
||||
|
||||
diff = np.max(np.abs((outputs_cache_next[0][:, -1, :5] - outputs[0][:, -1, :5])))
|
||||
self.parent.assertTrue(diff < 1e-3, msg=f"Max diff is {diff}")
|
||||
|
||||
|
||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask=None,
|
||||
decoder_attention_mask=None,
|
||||
):
|
||||
if attention_mask is None:
|
||||
attention_mask = np.not_equal(input_ids, config.pad_token_id).astype(np.int8)
|
||||
if decoder_attention_mask is None:
|
||||
decoder_attention_mask = np.concatenate([np.ones(decoder_input_ids[:, :1].shape, dtype=np.int8), np.not_equal(decoder_input_ids[:, 1:], config.pad_token_id).astype(np.int8)], axis=-1)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"decoder_attention_mask": decoder_attention_mask,
|
||||
}
|
||||
|
||||
|
||||
@require_flax
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
) if is_flax_available()
|
||||
else ()
|
||||
)
|
||||
all_generative_model_classes = (Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_flax_available() else ()
|
||||
is_encoder_decoder = True
|
||||
test_pruning = False
|
||||
test_head_masking = False
|
||||
test_onnx = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = Flax{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_use_cache_forward(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
self.model_tester.check_use_cache_forward(model_class, config, inputs_dict)
|
||||
|
||||
def test_use_cache_forward_with_attn_mask(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
self.model_tester.check_use_cache_forward_with_attn_mask(model_class, config, inputs_dict)
|
||||
|
||||
|
||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||
if a is None and b is None:
|
||||
return True
|
||||
try:
|
||||
if _assert_tensors_equal(a, b, atol=atol):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
return np.array(tok_lst, dtype=np.int32)
|
||||
|
||||
|
||||
TOLERANCE = 1e-4
|
||||
|
||||
|
||||
@slow
|
||||
@require_sentencepiece
|
||||
@require_tokenizers
|
||||
@require_flax
|
||||
class Flax{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_no_head(self):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = np.array(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
|
||||
def test_inference_with_head(self):
|
||||
model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = np.array(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
|
||||
def test_seq_to_seq_generation(self):
|
||||
hf = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
|
||||
batch_input = [
|
||||
# string 1,
|
||||
# string 2,
|
||||
# string 3,
|
||||
# string 4,
|
||||
]
|
||||
|
||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||
dct = tok.batch_encode_plus(
|
||||
batch_input,
|
||||
max_length=512,
|
||||
padding="max_length",
|
||||
truncation_strategy="only_first",
|
||||
truncation=True,
|
||||
return_tensors="np",
|
||||
)
|
||||
|
||||
hypotheses_batch = hf.generate(
|
||||
input_ids=dct["input_ids"],
|
||||
attention_mask=dct["attention_mask"],
|
||||
num_beams=2,
|
||||
)
|
||||
|
||||
EXPECTED = [
|
||||
# here expected 1,
|
||||
# here expected 2,
|
||||
# here expected 3,
|
||||
# here expected 4,
|
||||
]
|
||||
|
||||
generated = tok.batch_decode(
|
||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||
)
|
||||
assert generated == EXPECTED
|
||||
{%- endif %}
|
@ -1,971 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_tf_available, {{cookiecutter.camelcase_modelname}}Config
|
||||
from transformers.testing_utils import require_tf, slow
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_input_mask=True,
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=16,
|
||||
type_sequence_label_size=2,
|
||||
initializer_range=0.02,
|
||||
num_labels=3,
|
||||
num_choices=4,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = 13
|
||||
self.seq_length = 7
|
||||
self.is_training = True
|
||||
self.use_input_mask = True
|
||||
self.use_token_type_ids = True
|
||||
self.use_labels = True
|
||||
self.vocab_size = 99
|
||||
self.hidden_size = 32
|
||||
self.num_hidden_layers = 5
|
||||
self.num_attention_heads = 4
|
||||
self.intermediate_size = 37
|
||||
self.hidden_act = "gelu"
|
||||
self.hidden_dropout_prob = 0.1
|
||||
self.attention_probs_dropout_prob = 0.1
|
||||
self.max_position_embeddings = 512
|
||||
self.type_vocab_size = 16
|
||||
self.type_sequence_label_size = 2
|
||||
self.initializer_range = 0.02
|
||||
self.num_labels = 3
|
||||
self.num_choices = 4
|
||||
self.scope = None
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
choice_labels = None
|
||||
if self.use_labels:
|
||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
||||
|
||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
hidden_act=self.hidden_act,
|
||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = self.prepare_config_and_inputs()
|
||||
|
||||
config.is_decoder = True
|
||||
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
|
||||
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
||||
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
)
|
||||
|
||||
def create_and_check_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
result = model(inputs)
|
||||
|
||||
result = model(input_ids)
|
||||
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_causal_lm_base_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.is_decoder = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
result = model(inputs)
|
||||
|
||||
result = model(input_ids)
|
||||
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_model_as_decoder(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
):
|
||||
config.add_cross_attention = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
"encoder_hidden_states": encoder_hidden_states,
|
||||
"encoder_attention_mask": encoder_attention_mask,
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
|
||||
|
||||
# Also check the case where encoder outputs are not passed
|
||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
|
||||
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
|
||||
def create_and_check_causal_lm_model(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.is_decoder = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
prediction_scores = model(inputs)["logits"]
|
||||
self.parent.assertListEqual(
|
||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
def create_and_check_causal_lm_model_as_decoder(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
):
|
||||
config.add_cross_attention = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
"encoder_hidden_states": encoder_hidden_states,
|
||||
"encoder_attention_mask": encoder_attention_mask,
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
|
||||
|
||||
prediction_scores = result["logits"]
|
||||
self.parent.assertListEqual(
|
||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
|
||||
def create_and_check_causal_lm_model_past(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
):
|
||||
config.is_decoder = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, use_cache=True)
|
||||
outputs_use_cache_conf = model(input_ids)
|
||||
outputs_no_past = model(input_ids, use_cache=False)
|
||||
|
||||
self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
|
||||
self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
|
||||
|
||||
past_key_values = outputs.past_key_values
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
||||
|
||||
# append to next input_ids and attn_mask
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, output_hidden_states=True).hidden_states[0]
|
||||
output_from_past = model(
|
||||
next_tokens, past_key_values=past_key_values, output_hidden_states=True
|
||||
).hidden_states[0]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, 0, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
|
||||
|
||||
def create_and_check_causal_lm_model_past_with_attn_mask(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
):
|
||||
config.is_decoder = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
|
||||
# create attention mask
|
||||
half_seq_length = self.seq_length // 2
|
||||
attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32)
|
||||
attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32)
|
||||
attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, attention_mask=attn_mask, use_cache=True)
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
||||
|
||||
past_key_values = outputs.past_key_values
|
||||
|
||||
# change a random masked slice from input_ids
|
||||
random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1
|
||||
random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size)
|
||||
vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change)
|
||||
condition = tf.transpose(
|
||||
tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))
|
||||
)
|
||||
input_ids = tf.where(condition, random_other_next_tokens, input_ids)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
attn_mask = tf.concat(
|
||||
[attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
output_from_no_past = model(
|
||||
next_input_ids,
|
||||
attention_mask=attn_mask,
|
||||
output_hidden_states=True,
|
||||
).hidden_states[0]
|
||||
output_from_past = model(
|
||||
next_tokens, past_key_values=past_key_values, attention_mask=attn_mask, output_hidden_states=True
|
||||
).hidden_states[0]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, 0, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
|
||||
|
||||
def create_and_check_causal_lm_model_past_large_inputs(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
):
|
||||
config.is_decoder = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
|
||||
input_ids = input_ids[:1, :]
|
||||
input_mask = input_mask[:1, :]
|
||||
self.batch_size = 1
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, attention_mask=input_mask, use_cache=True)
|
||||
past_key_values = outputs.past_key_values
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
|
||||
|
||||
output_from_no_past = model(
|
||||
next_input_ids,
|
||||
attention_mask=next_attention_mask,
|
||||
output_hidden_states=True,
|
||||
).hidden_states[0]
|
||||
output_from_past = model(
|
||||
next_tokens,
|
||||
attention_mask=next_attention_mask,
|
||||
past_key_values=past_key_values,
|
||||
output_hidden_states=True,
|
||||
).hidden_states[0]
|
||||
|
||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||
|
||||
def create_and_check_decoder_model_past_large_inputs(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
):
|
||||
config.add_cross_attention = True
|
||||
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
||||
|
||||
input_ids = input_ids[:1, :]
|
||||
input_mask = input_mask[:1, :]
|
||||
encoder_hidden_states = encoder_hidden_states[:1, :, :]
|
||||
encoder_attention_mask = encoder_attention_mask[:1, :]
|
||||
self.batch_size = 1
|
||||
|
||||
# first forward pass
|
||||
outputs = model(
|
||||
input_ids,
|
||||
attention_mask=input_mask,
|
||||
encoder_hidden_states=encoder_hidden_states,
|
||||
encoder_attention_mask=encoder_attention_mask,
|
||||
use_cache=True,
|
||||
)
|
||||
past_key_values = outputs.past_key_values
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
|
||||
|
||||
output_from_no_past = model(
|
||||
next_input_ids,
|
||||
attention_mask=next_attention_mask,
|
||||
encoder_hidden_states=encoder_hidden_states,
|
||||
encoder_attention_mask=encoder_attention_mask,
|
||||
output_hidden_states=True,
|
||||
).hidden_states[0]
|
||||
output_from_past = model(
|
||||
next_tokens,
|
||||
attention_mask=next_attention_mask,
|
||||
encoder_hidden_states=encoder_hidden_states,
|
||||
encoder_attention_mask=encoder_attention_mask,
|
||||
past_key_values=past_key_values,
|
||||
output_hidden_states=True,
|
||||
).hidden_states[0]
|
||||
|
||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||
|
||||
def create_and_check_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
result = model(inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
|
||||
def create_and_check_for_multiple_choice(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_choices = self.num_choices
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
|
||||
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
|
||||
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
|
||||
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
|
||||
inputs = {
|
||||
"input_ids": multiple_choice_inputs_ids,
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
||||
|
||||
def create_and_check_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
||||
|
||||
def create_and_check_for_question_answering(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
result = model(inputs)
|
||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (
|
||||
(
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
)
|
||||
if is_tf_available()
|
||||
else ()
|
||||
)
|
||||
|
||||
test_head_masking = False
|
||||
test_onnx = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_model(self):
|
||||
"""Test the base model"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||
|
||||
@unittest.skip(reason="Template classes interact badly with this test.")
|
||||
def test_keras_fit(self):
|
||||
pass
|
||||
|
||||
def test_causal_lm_base_model(self):
|
||||
"""Test the base model of the causal LM model
|
||||
|
||||
is_deocder=True, no cross_attention, no encoder outputs
|
||||
"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_causal_lm_base_model(*config_and_inputs)
|
||||
|
||||
def test_model_as_decoder(self):
|
||||
"""Test the base model as a decoder (of an encoder-decoder architecture)
|
||||
|
||||
is_deocder=True + cross_attention + pass encoder outputs
|
||||
"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||
|
||||
def test_for_masked_lm(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
||||
|
||||
def test_for_causal_lm(self):
|
||||
"""Test the causal LM model"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_causal_lm_model(*config_and_inputs)
|
||||
|
||||
def test_causal_lm_model_as_decoder(self):
|
||||
"""Test the causal LM model as a decoder"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||
self.model_tester.create_and_check_causal_lm_model_as_decoder(*config_and_inputs)
|
||||
|
||||
def test_causal_lm_model_past(self):
|
||||
"""Test causal LM model with `past_key_values`"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_causal_lm_model_past(*config_and_inputs)
|
||||
|
||||
def test_causal_lm_model_past_with_attn_mask(self):
|
||||
"""Test the causal LM model with `past_key_values` and `attention_mask`"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_causal_lm_model_past_with_attn_mask(*config_and_inputs)
|
||||
|
||||
def test_causal_lm_model_past_with_large_inputs(self):
|
||||
"""Test the causal LM model with `past_key_values` and a longer decoder sequence length"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_causal_lm_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_decoder_model_past_with_large_inputs(self):
|
||||
"""Similar to `test_causal_lm_model_past_with_large_inputs` but with cross-attention"""
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_for_multiple_choice(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
|
||||
|
||||
def test_for_question_answering(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
|
||||
|
||||
def test_for_sequence_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
|
||||
|
||||
def test_for_token_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_masked_lm(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForMaskedLM.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
||||
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
||||
output = model(input_ids)[0]
|
||||
|
||||
# TODO Replace vocab size
|
||||
vocab_size = 32000
|
||||
|
||||
expected_shape = [1, 6, vocab_size]
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
|
||||
print(output[:, :3, :3])
|
||||
|
||||
# TODO Replace values below with what was printed above.
|
||||
expected_slice = tf.constant(
|
||||
[
|
||||
[
|
||||
[-0.05243197, -0.04498899, 0.05512108],
|
||||
[-0.07444685, -0.01064632, 0.04352357],
|
||||
[-0.05020351, 0.05530146, 0.00700043],
|
||||
]
|
||||
]
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||
|
||||
{% else %}
|
||||
import unittest
|
||||
|
||||
from transformers import (
|
||||
is_tf_available,
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
||||
)
|
||||
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
||||
config_updates = {}
|
||||
hidden_act = "gelu"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_labels=False,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=20,
|
||||
eos_token_id=2,
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
|
||||
eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
|
||||
input_ids = tf.concat([input_ids, eos_tensor], axis=1)
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = self.config_cls(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
decoder_layers=self.num_hidden_layers,
|
||||
encoder_attention_heads=self.num_attention_heads,
|
||||
decoder_attention_heads=self.num_attention_heads,
|
||||
encoder_ffn_dim=self.intermediate_size,
|
||||
decoder_ffn_dim=self.intermediate_size,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
eos_token_ids=[2],
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.pad_token_id,
|
||||
**self.config_updates,
|
||||
)
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder()
|
||||
input_ids = inputs_dict["input_ids"]
|
||||
|
||||
input_ids = input_ids[:1, :]
|
||||
attention_mask = inputs_dict["attention_mask"][:1, :]
|
||||
self.batch_size = 1
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||
|
||||
output, past_key_values = outputs.to_tuple()
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
|
||||
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]
|
||||
|
||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||
|
||||
|
||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask=None,
|
||||
decoder_attention_mask=None,
|
||||
):
|
||||
if attention_mask is None:
|
||||
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int32)
|
||||
if decoder_attention_mask is None:
|
||||
decoder_attention_mask = tf.concat([tf.ones(decoder_input_ids[:, :1].shape, dtype=tf.int32), tf.cast(tf.math.not_equal(decoder_input_ids[:, 1:], config.pad_token_id), tf.int32)], axis=-1)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"decoder_attention_mask": decoder_attention_mask,
|
||||
}
|
||||
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model) if is_tf_available() else ()
|
||||
all_generative_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_tf_available() else ()
|
||||
is_encoder_decoder = True
|
||||
test_pruning = False
|
||||
test_head_masking = False
|
||||
test_onnx = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_decoder_model_past_large_inputs(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
@unittest.skip(reason="Template classes interact badly with this test.")
|
||||
def test_keras_fit(self):
|
||||
pass
|
||||
|
||||
|
||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||
if a is None and b is None:
|
||||
return True
|
||||
try:
|
||||
if tf.debugging.assert_near(a, b, atol=atol):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
return tf.constant(tok_lst, dtype=tf.int32)
|
||||
|
||||
|
||||
TOLERANCE = 1e-4
|
||||
|
||||
|
||||
@slow
|
||||
@require_sentencepiece
|
||||
@require_tokenizers
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_no_head(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = tf.Tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
|
||||
def test_inference_with_head(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = tf.Tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
|
||||
def test_seq_to_seq_generation(self):
|
||||
hf = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
|
||||
batch_input = [
|
||||
# string 1,
|
||||
# string 2,
|
||||
# string 3,
|
||||
# string 4,
|
||||
]
|
||||
|
||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||
dct = tok.batch_encode_plus(
|
||||
batch_input,
|
||||
max_length=512,
|
||||
padding="max_length",
|
||||
truncation_strategy="only_first",
|
||||
truncation=True,
|
||||
return_tensors="tf",
|
||||
)
|
||||
|
||||
hypotheses_batch = hf.generate(
|
||||
input_ids=dct["input_ids"],
|
||||
attention_mask=dct["attention_mask"],
|
||||
num_beams=2,
|
||||
)
|
||||
|
||||
EXPECTED = [
|
||||
# here expected 1,
|
||||
# here expected 2,
|
||||
# here expected 3,
|
||||
# here expected 4,
|
||||
]
|
||||
|
||||
generated = tok.batch_decode(
|
||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||
)
|
||||
assert generated == EXPECTED
|
||||
{%- endif %}
|
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@
|
||||
## Copyright 2022 The HuggingFace Team. All rights reserved.
|
||||
##
|
||||
## Licensed under the Apache License, Version 2.0 (the "License");
|
||||
## you may not use this file except in compliance with the License.
|
||||
## You may obtain a copy of the License at
|
||||
##
|
||||
## http://www.apache.org/licenses/LICENSE-2.0
|
||||
##
|
||||
## Unless required by applicable law or agreed to in writing, software
|
||||
## distributed under the License is distributed on an "AS IS" BASIS,
|
||||
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
## See the License for the specific language governing permissions and
|
||||
## limitations under the License.
|
||||
|
||||
## This file is made so that specific statements may be copied inside existing files. This is useful to copy
|
||||
## import statements in __init__.py, or to complete model lists in the AUTO files.
|
||||
##
|
||||
## It is to be used as such:
|
||||
## Put '# To replace in: "FILE_PATH"' in order to indicate the contents will be copied in the file at path FILE_PATH
|
||||
## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurrence** of that line in the file at FILE_PATH
|
||||
## Put '# Replace with:' followed by the lines containing the content to define the content
|
||||
## End a statement with '# End.'. If starting a new statement without redefining the FILE_PATH, it will continue pasting
|
||||
## content in that file.
|
||||
##
|
||||
## Put '## COMMENT' to comment on the file.
|
||||
|
||||
# To replace in: "src/transformers/__init__.py"
|
||||
# Below: " # PyTorch models structure" if generating PyTorch
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"{{cookiecutter.camelcase_modelname}}Model",
|
||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
"load_tf_weights_in_{{cookiecutter.lowercase_modelname}}",
|
||||
]
|
||||
)
|
||||
{% else %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"{{cookiecutter.camelcase_modelname}}Model",
|
||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # TensorFlow models structure" if generating TensorFlow
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
)
|
||||
{% else %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # Flax models structure" if generating Flax
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Layer",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
)
|
||||
{% else %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
||||
]
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # Fast tokenizers structure"
|
||||
# Replace with:
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].append("{{cookiecutter.camelcase_modelname}}TokenizerFast")
|
||||
# End.
|
||||
|
||||
# Below: " # Models"
|
||||
# Replace with:
|
||||
"models.{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.camelcase_modelname}}Config", "{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
||||
# End.
|
||||
|
||||
# To replace in: "src/transformers/__init__.py"
|
||||
# Below: " # PyTorch model imports" if generating PyTorch
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Layer,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # TensorFlow model imports" if generating TensorFlow
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
TF_{{cookiecutter.uppercase_modelname}} TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}Layer,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # Flax model imports" if generating Flax
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Layer,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: " # Fast tokenizers imports"
|
||||
# Replace with:
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
# End.
|
||||
|
||||
# Below: " from .models.albert import AlbertConfig"
|
||||
# Replace with:
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}{{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
# End.
|
||||
|
||||
|
||||
|
||||
# To replace in: "src/transformers/models/__init__.py"
|
||||
# Below: "from . import ("
|
||||
# Replace with:
|
||||
{{cookiecutter.lowercase_modelname}},
|
||||
# End.
|
||||
|
||||
|
||||
# To replace in: "src/transformers/models/auto/configuration_auto.py"
|
||||
# Below: "# Add configs here"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}Config"),
|
||||
# End.
|
||||
|
||||
# Below: "# Add full (and cased) model names here"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}"),
|
||||
# End.
|
||||
|
||||
|
||||
|
||||
# To replace in: "src/transformers/models/auto/modeling_auto.py" if generating PyTorch
|
||||
# Below: "# Base model mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}Model"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model with LM heads mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Causal LM mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Sequence Classification mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Question Answering mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Token Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Multiple Choice mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
|
||||
# Below: "# Base model mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}Model"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model with LM heads mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Sequence Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Question Answering mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Token Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Multiple Choice mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# To replace in: "src/transformers/models/auto/modeling_flax_auto.py" if generating Flax
|
||||
# Below: "# Base model mapping"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}Model"),
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Sequence Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Question Answering mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Token Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Multiple Choice mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else %}
|
||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
|
||||
|
||||
# To replace in: "utils/check_repo.py" if generating PyTorch
|
||||
|
||||
# Below: "models to ignore for model xxx mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else -%}
|
||||
"{{cookiecutter.camelcase_modelname}}Encoder",
|
||||
"{{cookiecutter.camelcase_modelname}}Decoder",
|
||||
"{{cookiecutter.camelcase_modelname}}DecoderWrapper",
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "models to ignore for not tested"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else -%}
|
||||
"{{cookiecutter.camelcase_modelname}}Encoder", # Building part of bigger (tested) model.
|
||||
"{{cookiecutter.camelcase_modelname}}Decoder", # Building part of bigger (tested) model.
|
||||
"{{cookiecutter.camelcase_modelname}}DecoderWrapper", # Building part of bigger (tested) model.
|
||||
{% endif -%}
|
||||
# End.
|
@ -1,157 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tokenization classes for {{cookiecutter.modelname}}."""
|
||||
|
||||
{%- if cookiecutter.tokenizer_type == "Based on BERT" %}
|
||||
from ...utils import logging
|
||||
from ..bert.tokenization_bert_fast import BertTokenizerFast
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
||||
r"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
||||
|
||||
[`~{{cookiecutter.camelcase_modelname}}TokenizerFast`] is identical to [`BertTokenizerFast`] and runs
|
||||
end-to-end tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
from ...utils import logging
|
||||
from ..bart.tokenization_bart_fast import BartTokenizerFast
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
||||
r"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
||||
|
||||
[`~{{cookiecutter.camelcase_modelname}}TokenizerFast`] is identical to [`BartTokenizerFast`] and runs
|
||||
end-to-end tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass [`BartTokenizerFast`] for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
from typing import List, Optional
|
||||
|
||||
from tokenizers import ByteLevelBPETokenizer
|
||||
|
||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||
from ...utils import logging
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast):
|
||||
"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
||||
|
||||
Args:
|
||||
vocab_file (`str`):
|
||||
Path to the vocabulary file.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
merges_file,
|
||||
unk_token="<|endoftext|>",
|
||||
bos_token="<|endoftext|>",
|
||||
eos_token="<|endoftext|>",
|
||||
add_prefix_space=False,
|
||||
trim_offsets=True,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(
|
||||
ByteLevelBPETokenizer(
|
||||
vocab_file=vocab_file,
|
||||
merges_file=merges_file,
|
||||
add_prefix_space=add_prefix_space,
|
||||
trim_offsets=trim_offsets,
|
||||
),
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
unk_token=unk_token,
|
||||
**kwargs,
|
||||
)
|
||||
self.add_prefix_space = add_prefix_space
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of zeros.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
|
||||
if token_ids_1 is None:
|
||||
return len(cls + token_ids_0 + sep) * [0]
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
|
||||
{% endif %}
|
@ -1,293 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tokenization classes for {{cookiecutter.modelname}}."""
|
||||
|
||||
{%- if cookiecutter.tokenizer_type == "Based on BERT" %}
|
||||
from ...utils import logging
|
||||
from ..bert.tokenization_bert import BertTokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
||||
r"""
|
||||
Construct a {{cookiecutter.modelname}} tokenizer.
|
||||
|
||||
[`~{{cookiecutter.camelcase_modelname}}Tokenizer`] is identical to [`BertTokenizer`] and runs end-to-end
|
||||
tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
from ...utils import logging
|
||||
from ..bart.tokenization_bart import BartTokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
||||
"""
|
||||
Construct a {{cookiecutter.modelname}} tokenizer.
|
||||
|
||||
[`~{{cookiecutter.camelcase_modelname}}Tokenizer`] is identical to [`BartTokenizer`] and runs end-to-end
|
||||
tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass [`BartTokenizer`] for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
from typing import List, Optional
|
||||
|
||||
from tokenizers import ByteLevelBPETokenizer
|
||||
|
||||
from ...tokenization_utils import AddedToken, PreTrainedTokenizer
|
||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||
from ...utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
Construct a {{cookiecutter.modelname}} tokenizer. Based on byte-level Byte-Pair-Encoding.
|
||||
|
||||
Args:
|
||||
vocab_file (`str`):
|
||||
Path to the vocabulary file.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
model_input_names = ["input_ids", "attention_mask"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
unk_token="<|endoftext|>",
|
||||
bos_token="<|endoftext|>",
|
||||
eos_token="<|endoftext|>",
|
||||
**kwargs
|
||||
):
|
||||
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
|
||||
eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
||||
unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
|
||||
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
|
||||
|
||||
""" Initialisation """
|
||||
|
||||
@property
|
||||
def vocab_size(self):
|
||||
""" Returns vocab size """
|
||||
|
||||
def get_vocab(self):
|
||||
""" Returns vocab as a dict """
|
||||
|
||||
def _tokenize(self, text):
|
||||
""" Returns a tokenized string. """
|
||||
|
||||
def _convert_token_to_id(self, token):
|
||||
""" Converts a token (str) in an id using the vocab. """
|
||||
|
||||
def _convert_id_to_token(self, index):
|
||||
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||
|
||||
def convert_tokens_to_string(self, tokens):
|
||||
""" Converts a sequence of tokens (string) in a single string. """
|
||||
|
||||
def save_vocabulary(self, save_directory):
|
||||
"""
|
||||
Save the vocabulary and special tokens file to a directory.
|
||||
|
||||
Args:
|
||||
save_directory (`str`):
|
||||
The directory in which to save the vocabulary.
|
||||
|
||||
Returns:
|
||||
`Tuple(str)`: Paths to the files saved.
|
||||
"""
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks
|
||||
by concatenating and adding special tokens.
|
||||
A {{cookiecutter.modelname}} sequence has the following format:
|
||||
|
||||
- single sequence: `<s> X </s>`
|
||||
- pair of sequences: `<s> A </s></s> B </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
||||
|
||||
def get_special_tokens_mask(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
||||
) -> List[int]:
|
||||
"""
|
||||
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||
special tokens using the tokenizer `prepare_for_model` method.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not the token list is already formatted with special tokens for the model.
|
||||
|
||||
Returns:
|
||||
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
||||
"""
|
||||
if already_has_special_tokens:
|
||||
return super().get_special_tokens_mask(
|
||||
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
|
||||
)
|
||||
|
||||
if token_ids_1 is None:
|
||||
return [1] + ([0] * len(token_ids_0)) + [1]
|
||||
return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of zeros.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
|
||||
if token_ids_1 is None:
|
||||
return len(cls + token_ids_0 + sep) * [0]
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
||||
text = " " + text
|
||||
return (text, kwargs)
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast):
|
||||
"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
||||
|
||||
Args:
|
||||
vocab_file (`str`):
|
||||
Path to the vocabulary file.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
model_input_names = ["input_ids", "attention_mask"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
merges_file,
|
||||
unk_token="<|endoftext|>",
|
||||
bos_token="<|endoftext|>",
|
||||
eos_token="<|endoftext|>",
|
||||
add_prefix_space=False,
|
||||
trim_offsets=True,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(
|
||||
ByteLevelBPETokenizer(
|
||||
vocab_file=vocab_file,
|
||||
merges_file=merges_file,
|
||||
add_prefix_space=add_prefix_space,
|
||||
trim_offsets=trim_offsets,
|
||||
),
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
unk_token=unk_token,
|
||||
**kwargs,
|
||||
)
|
||||
self.add_prefix_space = add_prefix_space
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of zeros.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
|
||||
if token_ids_1 is None:
|
||||
return len(cls + token_ids_0 + sep) * [0]
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
{% endif %}
|
@ -1,234 +0,0 @@
|
||||
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
# {{cookiecutter.modelname}}
|
||||
|
||||
## Overview
|
||||
|
||||
The {{cookiecutter.modelname}} model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>. <INSERT SHORT SUMMARY HERE>
|
||||
|
||||
The abstract from the paper is the following:
|
||||
|
||||
*<INSERT PAPER ABSTRACT HERE>*
|
||||
|
||||
Tips:
|
||||
|
||||
<INSERT TIPS ABOUT MODEL HERE>
|
||||
|
||||
This model was contributed by [INSERT YOUR HF USERNAME HERE](<https://huggingface.co/<INSERT YOUR HF USERNAME HERE>). The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}Config
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Config
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
- build_inputs_with_special_tokens
|
||||
- get_special_tokens_mask
|
||||
- create_token_type_ids_from_sequences
|
||||
- save_vocabulary
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
|
||||
|
||||
{% if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
||||
## {{cookiecutter.camelcase_modelname}}Model
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Model
|
||||
- forward
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
## {{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
|
||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
- forward
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
|
||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
|
||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
- forward
|
||||
|
||||
{%- else %}
|
||||
## {{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
- forward
|
||||
|
||||
|
||||
## {{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
|
||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
- forward
|
||||
|
||||
|
||||
{% endif -%}
|
||||
{% endif -%}
|
||||
{% if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}Model
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}Model
|
||||
- call
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
- call
|
||||
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
- call
|
||||
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
- call
|
||||
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
- call
|
||||
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
- call
|
||||
|
||||
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
- call
|
||||
|
||||
|
||||
{%- else %}
|
||||
## TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
|
||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
- call
|
||||
|
||||
|
||||
{% endif -%}
|
||||
{% endif -%}
|
||||
|
||||
{% if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}Model
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}Model
|
||||
- call
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
- call
|
||||
|
||||
|
||||
{%- else %}
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
- call
|
||||
|
||||
|
||||
## Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
|
||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
- call
|
||||
|
||||
|
||||
{% endif -%}
|
||||
{% endif -%}
|
@ -1,19 +0,0 @@
|
||||
{
|
||||
"modelname": "BrandNewBERT",
|
||||
"uppercase_modelname": "BRAND_NEW_BERT",
|
||||
"lowercase_modelname": "brand_new_bert",
|
||||
"camelcase_modelname": "BrandNewBert",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
|
||||
"generate_tensorflow_pytorch_and_flax": [
|
||||
"PyTorch, TensorFlow and Flax",
|
||||
"PyTorch & TensorFlow",
|
||||
"PyTorch & Flax",
|
||||
"TensorFlow & Flax",
|
||||
"PyTorch",
|
||||
"TensorFlow",
|
||||
"Flax"
|
||||
],
|
||||
"is_encoder_decoder_model": ["True", "False"]
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "Template",
|
||||
"uppercase_modelname": "TEMPLATE",
|
||||
"lowercase_modelname": "template",
|
||||
"camelcase_modelname": "Template",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "TemplateFLAX",
|
||||
"uppercase_modelname": "TEMPLATE_FLAX",
|
||||
"lowercase_modelname": "template_flax",
|
||||
"camelcase_modelname": "TemplateFlax",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_pytorch_and_flax": "Flax",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "FlaxNewENCDEC",
|
||||
"uppercase_modelname": "FLAX_NEW_ENC_DEC",
|
||||
"lowercase_modelname": "flax_new_enc_dec_template",
|
||||
"camelcase_modelname": "FlaxNewEncDec",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "new-flax-enc-dec-base",
|
||||
"tokenizer_type": "Based on BART",
|
||||
"generate_tensorflow_pytorch_and_flax": "Flax",
|
||||
"is_encoder_decoder_model": "True"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "TemplatePT",
|
||||
"uppercase_modelname": "TEMPLATE_PT",
|
||||
"lowercase_modelname": "template_pt",
|
||||
"camelcase_modelname": "TemplatePt",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_pytorch_and_flax": "PyTorch",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "PTNewENCDEC",
|
||||
"uppercase_modelname": "PT_NEW_ENC_DEC",
|
||||
"lowercase_modelname": "pt_new_enc_dec_template",
|
||||
"camelcase_modelname": "PtNewEncDec",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "pt-new-enc-dec-base",
|
||||
"tokenizer_type": "Based on BART",
|
||||
"generate_tensorflow_pytorch_and_flax": "PyTorch",
|
||||
"is_encoder_decoder_model": "True"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "TemplateBI",
|
||||
"uppercase_modelname": "TEMPLATE_BI",
|
||||
"lowercase_modelname": "template_bi",
|
||||
"camelcase_modelname": "TemplateBi",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "bi-brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Standalone",
|
||||
"generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "TemplateTF",
|
||||
"uppercase_modelname": "TEMPLATE_TF",
|
||||
"lowercase_modelname": "template_tf",
|
||||
"camelcase_modelname": "TemplateTf",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_pytorch_and_flax": "TensorFlow",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
{
|
||||
"modelname": "NewTFENCDEC",
|
||||
"uppercase_modelname": "NEW_TF_ENC_DEC",
|
||||
"lowercase_modelname": "new_tf_enc_dec_template",
|
||||
"camelcase_modelname": "NewTFEncDec",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "new-tf-enc-dec-base_template",
|
||||
"tokenizer_type": "Based on BART",
|
||||
"generate_tensorflow_pytorch_and_flax": "TensorFlow",
|
||||
"is_encoder_decoder_model": "True"
|
||||
}
|
@ -335,7 +335,6 @@ src/transformers/benchmark/benchmark_args_tf.py
|
||||
src/transformers/benchmark/benchmark_args_utils.py
|
||||
src/transformers/benchmark/benchmark_tf.py
|
||||
src/transformers/benchmark/benchmark_utils.py
|
||||
src/transformers/commands/add_new_model.py
|
||||
src/transformers/commands/add_new_model_like.py
|
||||
src/transformers/commands/convert.py
|
||||
src/transformers/commands/download.py
|
||||
|
Loading…
Reference in New Issue
Block a user