mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Remove add-new-model in favor of add-new-model-like (#30424)
* Remove add-new-model in favor of add-new-model-like * nits
This commit is contained in:
parent
0eb8fbcdac
commit
d4e92f1a21
81
.github/workflows/model-templates.yml
vendored
81
.github/workflows/model-templates.yml
vendored
@ -1,81 +0,0 @@
|
|||||||
name: Model templates runner
|
|
||||||
|
|
||||||
on:
|
|
||||||
repository_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: "0 2 * * *"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run_tests_templates:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt -y update && sudo apt install -y libsndfile1-dev
|
|
||||||
|
|
||||||
- name: Load cached virtual environment
|
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: ~/venv/
|
|
||||||
key: v4-tests_templates-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Create virtual environment on cache miss
|
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
python -m venv ~/venv && . ~/venv/bin/activate
|
|
||||||
pip install --upgrade pip!=21.3
|
|
||||||
pip install -e .[dev]
|
|
||||||
|
|
||||||
- name: Check transformers location
|
|
||||||
# make `transformers` available as package (required since we use `-e` flag) and check it's indeed from the repo.
|
|
||||||
run: |
|
|
||||||
. ~/venv/bin/activate
|
|
||||||
python setup.py develop
|
|
||||||
transformer_loc=$(pip show transformers | grep "Location: " | cut -c11-)
|
|
||||||
transformer_repo_loc=$(pwd .)
|
|
||||||
if [ "$transformer_loc" != "$transformer_repo_loc/src" ]; then
|
|
||||||
echo "transformers is from $transformer_loc but it shoud be from $transformer_repo_loc/src."
|
|
||||||
echo "A fix is required. Stop testing."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Create model files
|
|
||||||
run: |
|
|
||||||
. ~/venv/bin/activate
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/flax-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/flax-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
make style
|
|
||||||
python utils/check_table.py --fix_and_overwrite
|
|
||||||
python utils/check_dummies.py --fix_and_overwrite
|
|
||||||
python utils/check_copies.py --fix_and_overwrite
|
|
||||||
|
|
||||||
- name: Run all non-slow tests
|
|
||||||
run: |
|
|
||||||
. ~/venv/bin/activate
|
|
||||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_templates tests/*template*
|
|
||||||
|
|
||||||
- name: Run style changes
|
|
||||||
run: |
|
|
||||||
. ~/venv/bin/activate
|
|
||||||
make style && make quality && make repo-consistency
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: cat reports/tests_templates/failures_short.txt
|
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: run_all_tests_templates_test_reports
|
|
||||||
path: reports/tests_templates
|
|
@ -110,7 +110,7 @@ New models are constantly released and if you want to implement a new model, ple
|
|||||||
|
|
||||||
If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
|
If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
|
||||||
|
|
||||||
We have added a [detailed guide and templates](https://github.com/huggingface/transformers/tree/main/templates) to help you get started with adding a new model, and we also have a more technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
|
We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model).
|
||||||
|
|
||||||
## Do you want to add documentation?
|
## Do you want to add documentation?
|
||||||
|
|
||||||
|
@ -398,12 +398,14 @@ In dem speziellen Fall, dass Sie ein Modell hinzufügen, dessen Architektur gena
|
|||||||
Modells übereinstimmt, müssen Sie nur ein Konvertierungsskript hinzufügen, wie in [diesem Abschnitt](#write-a-conversion-script) beschrieben.
|
Modells übereinstimmt, müssen Sie nur ein Konvertierungsskript hinzufügen, wie in [diesem Abschnitt](#write-a-conversion-script) beschrieben.
|
||||||
In diesem Fall können Sie einfach die gesamte Modellarchitektur des bereits vorhandenen Modells wiederverwenden.
|
In diesem Fall können Sie einfach die gesamte Modellarchitektur des bereits vorhandenen Modells wiederverwenden.
|
||||||
|
|
||||||
Andernfalls beginnen wir mit der Erstellung eines neuen Modells. Sie haben hier zwei Möglichkeiten:
|
Andernfalls beginnen wir mit der Erstellung eines neuen Modells. Wir empfehlen die Verwendung des folgenden Skripts, um ein Modell hinzuzufügen
|
||||||
|
ein bestehendes Modell:
|
||||||
|
|
||||||
- `transformers-cli add-new-model-like`, um ein neues Modell wie ein bestehendes hinzuzufügen
|
```bash
|
||||||
- `transformers-cli add-new-model`, um ein neues Modell aus unserer Vorlage hinzuzufügen (sieht dann aus wie BERT oder Bart, je nachdem, welche Art von Modell Sie wählen)
|
transformers-cli add-new-model-like
|
||||||
|
```
|
||||||
|
|
||||||
In beiden Fällen werden Sie mit einem Fragebogen aufgefordert, die grundlegenden Informationen zu Ihrem Modell auszufüllen. Für den zweiten Befehl müssen Sie `cookiecutter` installieren, weitere Informationen dazu finden Sie [hier](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
Sie werden mit einem Fragebogen aufgefordert, die grundlegenden Informationen Ihres Modells einzugeben.
|
||||||
|
|
||||||
**Eröffnen Sie einen Pull Request auf dem Haupt-Repositorium huggingface/transformers**
|
**Eröffnen Sie einen Pull Request auf dem Haupt-Repositorium huggingface/transformers**
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ Es werden ständig neue Modelle veröffentlicht. Wenn Sie ein neues Modell imple
|
|||||||
|
|
||||||
Lassen Sie es uns wissen, wenn Sie bereit sind, das Modell selbst beizutragen. Dann können wir Ihnen helfen, es zu 🤗 Transformers hinzuzufügen!
|
Lassen Sie es uns wissen, wenn Sie bereit sind, das Modell selbst beizutragen. Dann können wir Ihnen helfen, es zu 🤗 Transformers hinzuzufügen!
|
||||||
|
|
||||||
Wir haben eine [detaillierte Anleitung und Vorlagen](https://github.com/huggingface/transformers/tree/main/templates) hinzugefügt, um Ihnen das Hinzufügen eines neuen Modells zu erleichtern, und wir haben auch einen technischen Leitfaden dazu, [wie man ein Modell zu 🤗 Transformers hinzufügt](https://huggingface.co/docs/transformers/add_new_model).
|
Wir haben auch einen technischen Leitfaden dazu, [wie man ein Modell zu 🤗 Transformers hinzufügt](https://huggingface.co/docs/transformers/add_new_model).
|
||||||
|
|
||||||
## Möchten Sie die Dokumentation erweitern?
|
## Möchten Sie die Dokumentation erweitern?
|
||||||
|
|
||||||
|
@ -398,12 +398,14 @@ In the special case that you are adding a model whose architecture exactly match
|
|||||||
existing model you only have to add a conversion script as described in [this section](#write-a-conversion-script).
|
existing model you only have to add a conversion script as described in [this section](#write-a-conversion-script).
|
||||||
In this case, you can just re-use the whole model architecture of the already existing model.
|
In this case, you can just re-use the whole model architecture of the already existing model.
|
||||||
|
|
||||||
Otherwise, let's start generating a new model. You have two choices here:
|
Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
|
||||||
|
an existing model:
|
||||||
|
|
||||||
- `transformers-cli add-new-model-like` to add a new model like an existing one
|
```bash
|
||||||
- `transformers-cli add-new-model` to add a new model from our template (will look like BERT or Bart depending on the type of model you select)
|
transformers-cli add-new-model-like
|
||||||
|
```
|
||||||
|
|
||||||
In both cases, you will be prompted with a questionnaire to fill in the basic information of your model. The second command requires to install `cookiecutter`, you can find more information on it [here](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
You will be prompted with a questionnaire to fill in the basic information of your model.
|
||||||
|
|
||||||
**Open a Pull Request on the main huggingface/transformers repo**
|
**Open a Pull Request on the main huggingface/transformers repo**
|
||||||
|
|
||||||
|
@ -351,13 +351,14 @@ Nel caso speciale in cui stiate aggiungendo un modello, la cui architettura sia
|
|||||||
dovrete solo aggiugnere uno script di conversione, come descritto [qui](#write-a-conversion-script).
|
dovrete solo aggiugnere uno script di conversione, come descritto [qui](#write-a-conversion-script).
|
||||||
In questo caso, potete riutilizzare l'intera architettura del modello gia esistente.
|
In questo caso, potete riutilizzare l'intera architettura del modello gia esistente.
|
||||||
|
|
||||||
Se questo non é il caso, cominciamo con il generare un nuovo modello. Avrete due opzioni:
|
Se questo non é il caso, cominciamo con il generare un nuovo modello. Ti consigliamo di utilizzare il seguente script per aggiungere un modello a partire da
|
||||||
|
un modello esistente:
|
||||||
|
|
||||||
- `transformers-cli add-new-model-like` per aggiungere un nuovo modello come uno che gia esiste
|
```bash
|
||||||
- `transformers-cli add-new-model` per aggiungere un nuovo modello da un nostro template (questo assomigliera a BERT o Bart, in base al modello che selezionerete)
|
transformers-cli add-new-model-like
|
||||||
|
```
|
||||||
|
|
||||||
In entrambi i casi, l'output vi darà un questionario da riempire con informazioni basi sul modello. Il secondo comando richiede di installare
|
Ti verrà richiesto con un questionario di compilare le informazioni di base del tuo modello.
|
||||||
un `cookiecutter` - maggiori informazioni [qui](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model).
|
|
||||||
|
|
||||||
**Aprire una Pull Request in main huggingface/transformers repo**
|
**Aprire una Pull Request in main huggingface/transformers repo**
|
||||||
|
|
||||||
|
@ -307,14 +307,15 @@ cd transformers
|
|||||||
[このセクション](#write-a-conversion-script)で説明されているように、変換スクリプトを追加するだけで済みます。
|
[このセクション](#write-a-conversion-script)で説明されているように、変換スクリプトを追加するだけで済みます。
|
||||||
この場合、既存のモデルの完全なモデルアーキテクチャを再利用できます。
|
この場合、既存のモデルの完全なモデルアーキテクチャを再利用できます。
|
||||||
|
|
||||||
それ以外の場合、新しいモデルの生成を開始します。ここで2つの選択肢があります:
|
|
||||||
|
|
||||||
- `transformers-cli add-new-model-like`を使用して既存のモデルのような新しいモデルを追加します
|
それ以外の場合は、新しいモデルの生成を開始しましょう。 次のスクリプトを使用して、以下から始まるモデルを追加することをお勧めします。
|
||||||
- `transformers-cli add-new-model`を使用して、テンプレートから新しいモデルを追加します(モデルのタイプに応じてBERTまたはBartのように見えます)
|
既存のモデル:
|
||||||
|
|
||||||
どちらの場合でも、モデルの基本情報を入力するための質問事項が表示されます。
|
```bash
|
||||||
2番目のコマンドを実行するには、`cookiecutter`をインストールする必要があります。
|
transformers-cli add-new-model-like
|
||||||
詳細については[こちら](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model)をご覧ください。
|
```
|
||||||
|
|
||||||
|
モデルの基本情報を入力するためのアンケートが表示されます。
|
||||||
|
|
||||||
**主要な huggingface/transformers リポジトリでプルリクエストを開く**
|
**主要な huggingface/transformers リポジトリでプルリクエストを開く**
|
||||||
|
|
||||||
|
@ -268,12 +268,14 @@ cd transformers
|
|||||||
|
|
||||||
다음과 같이 이미 존재하는 모델의 모델 아키텍처와 정확히 일치하는 모델을 추가하는 특별한 경우에는 [이 섹션](#write-a-conversion-script)에 설명된대로 변환 스크립트만 추가하면 됩니다. 이 경우에는 이미 존재하는 모델의 전체 모델 아키텍처를 그대로 재사용할 수 있습니다.
|
다음과 같이 이미 존재하는 모델의 모델 아키텍처와 정확히 일치하는 모델을 추가하는 특별한 경우에는 [이 섹션](#write-a-conversion-script)에 설명된대로 변환 스크립트만 추가하면 됩니다. 이 경우에는 이미 존재하는 모델의 전체 모델 아키텍처를 그대로 재사용할 수 있습니다.
|
||||||
|
|
||||||
그렇지 않으면 새로운 모델 생성을 시작합시다. 여기에서 두 가지 선택지가 있습니다:
|
그렇지 않으면 새 모델 생성을 시작하겠습니다. 다음 스크립트를 사용하여 다음에서 시작하는 모델을 추가하는 것이 좋습니다.
|
||||||
|
기존 모델:
|
||||||
|
|
||||||
- `transformers-cli add-new-model-like`를 사용하여 기존 모델과 유사한 새로운 모델 추가하기
|
```bash
|
||||||
- `transformers-cli add-new-model`을 사용하여 템플릿을 기반으로 한 새로운 모델 추가하기 (선택한 모델 유형에 따라 BERT 또는 Bart와 유사한 모습일 것입니다)
|
transformers-cli add-new-model-like
|
||||||
|
```
|
||||||
|
|
||||||
두 경우 모두, 모델의 기본 정보를 입력하는 설문조사가 제시됩니다. 두 번째 명령어는 `cookiecutter`를 설치해야 합니다. 자세한 정보는 [여기](https://github.com/huggingface/transformers/tree/main/templates/adding_a_new_model)에서 확인할 수 있습니다.
|
모델의 기본 정보를 입력하는 설문지가 표시됩니다.
|
||||||
|
|
||||||
**huggingface/transformers 메인 저장소에 Pull Request 열기**
|
**huggingface/transformers 메인 저장소에 Pull Request 열기**
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ python src/transformers/commands/transformers_cli.py env
|
|||||||
|
|
||||||
만약 모델을 직접 기여하고 싶으시다면, 알려주세요. 🤗 Transformers에 추가할 수 있도록 도와드리겠습니다!
|
만약 모델을 직접 기여하고 싶으시다면, 알려주세요. 🤗 Transformers에 추가할 수 있도록 도와드리겠습니다!
|
||||||
|
|
||||||
새로운 모델을 추가하는 방법에 대한 [상세 안내서와 템플릿](https://github.com/huggingface/transformers/tree/main/templates)을 제공하고 있으며, [🤗 Transformers에 새로운 모델을 추가하는 방법](https://huggingface.co/docs/transformers/add_new_model)에 대한 기술적인 안내서도 있습니다.
|
[🤗 Transformers에 새로운 모델을 추가하는 방법](https://huggingface.co/docs/transformers/add_new_model)에 대한 기술적인 안내서도 있습니다.
|
||||||
|
|
||||||
## 문서를 추가하고 싶으신가요? [[do-you-want-to-add-documentation]]
|
## 문서를 추가하고 싶으신가요? [[do-you-want-to-add-documentation]]
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ python src/transformers/commands/transformers_cli.py env
|
|||||||
|
|
||||||
如果你想亲自贡献模型,请告诉我们。让我们帮你把它添加到 🤗 Transformers!
|
如果你想亲自贡献模型,请告诉我们。让我们帮你把它添加到 🤗 Transformers!
|
||||||
|
|
||||||
我们已经添加了[详细的指南和模板](https://github.com/huggingface/transformers/tree/main/templates)来帮助你添加新模型。我们还有一个更技术性的指南,告诉你[如何将模型添加到 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model)。
|
我们还有一个更技术性的指南,告诉你[如何将模型添加到 🤗 Transformers](https://huggingface.co/docs/transformers/add_new_model)。
|
||||||
|
|
||||||
## 你想要添加文档吗?
|
## 你想要添加文档吗?
|
||||||
|
|
||||||
|
@ -1,259 +0,0 @@
|
|||||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import warnings
|
|
||||||
from argparse import ArgumentParser, Namespace
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from ..utils import logging
|
|
||||||
from . import BaseTransformersCLICommand
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
from cookiecutter.main import cookiecutter
|
|
||||||
|
|
||||||
_has_cookiecutter = True
|
|
||||||
except ImportError:
|
|
||||||
_has_cookiecutter = False
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
||||||
|
|
||||||
|
|
||||||
def add_new_model_command_factory(args: Namespace):
|
|
||||||
return AddNewModelCommand(args.testing, args.testing_file, path=args.path)
|
|
||||||
|
|
||||||
|
|
||||||
class AddNewModelCommand(BaseTransformersCLICommand):
|
|
||||||
@staticmethod
|
|
||||||
def register_subcommand(parser: ArgumentParser):
|
|
||||||
add_new_model_parser = parser.add_parser("add-new-model")
|
|
||||||
add_new_model_parser.add_argument("--testing", action="store_true", help="If in testing mode.")
|
|
||||||
add_new_model_parser.add_argument("--testing_file", type=str, help="Configuration file on which to run.")
|
|
||||||
add_new_model_parser.add_argument(
|
|
||||||
"--path", type=str, help="Path to cookiecutter. Should only be used for testing purposes."
|
|
||||||
)
|
|
||||||
add_new_model_parser.set_defaults(func=add_new_model_command_factory)
|
|
||||||
|
|
||||||
def __init__(self, testing: bool, testing_file: str, path=None, *args):
|
|
||||||
self._testing = testing
|
|
||||||
self._testing_file = testing_file
|
|
||||||
self._path = path
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
warnings.warn(
|
|
||||||
"The command `transformers-cli add-new-model` is deprecated and will be removed in v5 of Transformers. "
|
|
||||||
"It is not actively maintained anymore, so might give a result that won't pass all tests and quality "
|
|
||||||
"checks, you should use `transformers-cli add-new-model-like` instead."
|
|
||||||
)
|
|
||||||
if not _has_cookiecutter:
|
|
||||||
raise ImportError(
|
|
||||||
"Model creation dependencies are required to use the `add_new_model` command. Install them by running "
|
|
||||||
"the following at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
|
|
||||||
)
|
|
||||||
# Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
|
|
||||||
directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
|
|
||||||
if len(directories) > 0:
|
|
||||||
raise ValueError(
|
|
||||||
"Several directories starting with `cookiecutter-template-` in current working directory. "
|
|
||||||
"Please clean your directory by removing all folders starting with `cookiecutter-template-` or "
|
|
||||||
"change your working directory."
|
|
||||||
)
|
|
||||||
|
|
||||||
path_to_transformer_root = (
|
|
||||||
Path(__file__).parent.parent.parent.parent if self._path is None else Path(self._path).parent.parent
|
|
||||||
)
|
|
||||||
path_to_cookiecutter = path_to_transformer_root / "templates" / "adding_a_new_model"
|
|
||||||
|
|
||||||
# Execute cookiecutter
|
|
||||||
if not self._testing:
|
|
||||||
cookiecutter(str(path_to_cookiecutter))
|
|
||||||
else:
|
|
||||||
with open(self._testing_file, "r") as configuration_file:
|
|
||||||
testing_configuration = json.load(configuration_file)
|
|
||||||
|
|
||||||
cookiecutter(
|
|
||||||
str(path_to_cookiecutter if self._path is None else self._path),
|
|
||||||
no_input=True,
|
|
||||||
extra_context=testing_configuration,
|
|
||||||
)
|
|
||||||
|
|
||||||
directory = [directory for directory in os.listdir() if "cookiecutter-template-" in directory[:22]][0]
|
|
||||||
|
|
||||||
# Retrieve configuration
|
|
||||||
with open(directory + "/configuration.json", "r") as configuration_file:
|
|
||||||
configuration = json.load(configuration_file)
|
|
||||||
|
|
||||||
lowercase_model_name = configuration["lowercase_modelname"]
|
|
||||||
generate_tensorflow_pytorch_and_flax = configuration["generate_tensorflow_pytorch_and_flax"]
|
|
||||||
os.remove(f"{directory}/configuration.json")
|
|
||||||
|
|
||||||
output_pytorch = "PyTorch" in generate_tensorflow_pytorch_and_flax
|
|
||||||
output_tensorflow = "TensorFlow" in generate_tensorflow_pytorch_and_flax
|
|
||||||
output_flax = "Flax" in generate_tensorflow_pytorch_and_flax
|
|
||||||
|
|
||||||
model_dir = f"{path_to_transformer_root}/src/transformers/models/{lowercase_model_name}"
|
|
||||||
os.makedirs(model_dir, exist_ok=True)
|
|
||||||
os.makedirs(f"{path_to_transformer_root}/tests/models/{lowercase_model_name}", exist_ok=True)
|
|
||||||
|
|
||||||
# Tests require submodules as they have parent imports
|
|
||||||
with open(f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/__init__.py", "w"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/__init__.py",
|
|
||||||
f"{model_dir}/__init__.py",
|
|
||||||
)
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/configuration_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/configuration_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
def remove_copy_lines(path):
|
|
||||||
with open(path, "r") as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
with open(path, "w") as f:
|
|
||||||
for line in lines:
|
|
||||||
if "# Copied from transformers." not in line:
|
|
||||||
f.write(line)
|
|
||||||
|
|
||||||
if output_pytorch:
|
|
||||||
if not self._testing:
|
|
||||||
remove_copy_lines(f"{directory}/modeling_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/modeling_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/modeling_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/test_modeling_{lowercase_model_name}.py",
|
|
||||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
os.remove(f"{directory}/modeling_{lowercase_model_name}.py")
|
|
||||||
os.remove(f"{directory}/test_modeling_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
if output_tensorflow:
|
|
||||||
if not self._testing:
|
|
||||||
remove_copy_lines(f"{directory}/modeling_tf_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/modeling_tf_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/modeling_tf_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/test_modeling_tf_{lowercase_model_name}.py",
|
|
||||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_tf_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
os.remove(f"{directory}/modeling_tf_{lowercase_model_name}.py")
|
|
||||||
os.remove(f"{directory}/test_modeling_tf_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
if output_flax:
|
|
||||||
if not self._testing:
|
|
||||||
remove_copy_lines(f"{directory}/modeling_flax_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/modeling_flax_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/modeling_flax_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/test_modeling_flax_{lowercase_model_name}.py",
|
|
||||||
f"{path_to_transformer_root}/tests/models/{lowercase_model_name}/test_modeling_flax_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
os.remove(f"{directory}/modeling_flax_{lowercase_model_name}.py")
|
|
||||||
os.remove(f"{directory}/test_modeling_flax_{lowercase_model_name}.py")
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/{lowercase_model_name}.md",
|
|
||||||
f"{path_to_transformer_root}/docs/source/en/model_doc/{lowercase_model_name}.md",
|
|
||||||
)
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/tokenization_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/tokenization_{lowercase_model_name}.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
shutil.move(
|
|
||||||
f"{directory}/tokenization_fast_{lowercase_model_name}.py",
|
|
||||||
f"{model_dir}/tokenization_{lowercase_model_name}_fast.py",
|
|
||||||
)
|
|
||||||
|
|
||||||
from os import fdopen, remove
|
|
||||||
from shutil import copymode, move
|
|
||||||
from tempfile import mkstemp
|
|
||||||
|
|
||||||
def replace(original_file: str, line_to_copy_below: str, lines_to_copy: List[str]):
|
|
||||||
# Create temp file
|
|
||||||
fh, abs_path = mkstemp()
|
|
||||||
line_found = False
|
|
||||||
with fdopen(fh, "w") as new_file:
|
|
||||||
with open(original_file) as old_file:
|
|
||||||
for line in old_file:
|
|
||||||
new_file.write(line)
|
|
||||||
if line_to_copy_below in line:
|
|
||||||
line_found = True
|
|
||||||
for line_to_copy in lines_to_copy:
|
|
||||||
new_file.write(line_to_copy)
|
|
||||||
|
|
||||||
if not line_found:
|
|
||||||
raise ValueError(f"Line {line_to_copy_below} was not found in file.")
|
|
||||||
|
|
||||||
# Copy the file permissions from the old file to the new file
|
|
||||||
copymode(original_file, abs_path)
|
|
||||||
# Remove original file
|
|
||||||
remove(original_file)
|
|
||||||
# Move new file
|
|
||||||
move(abs_path, original_file)
|
|
||||||
|
|
||||||
def skip_units(line):
|
|
||||||
return (
|
|
||||||
("generating PyTorch" in line and not output_pytorch)
|
|
||||||
or ("generating TensorFlow" in line and not output_tensorflow)
|
|
||||||
or ("generating Flax" in line and not output_flax)
|
|
||||||
)
|
|
||||||
|
|
||||||
def replace_in_files(path_to_datafile):
|
|
||||||
with open(path_to_datafile) as datafile:
|
|
||||||
lines_to_copy = []
|
|
||||||
skip_file = False
|
|
||||||
skip_snippet = False
|
|
||||||
for line in datafile:
|
|
||||||
if "# To replace in: " in line and "##" not in line:
|
|
||||||
file_to_replace_in = line.split('"')[1]
|
|
||||||
skip_file = skip_units(line)
|
|
||||||
elif "# Below: " in line and "##" not in line:
|
|
||||||
line_to_copy_below = line.split('"')[1]
|
|
||||||
skip_snippet = skip_units(line)
|
|
||||||
elif "# End." in line and "##" not in line:
|
|
||||||
if not skip_file and not skip_snippet:
|
|
||||||
replace(file_to_replace_in, line_to_copy_below, lines_to_copy)
|
|
||||||
|
|
||||||
lines_to_copy = []
|
|
||||||
elif "# Replace with" in line and "##" not in line:
|
|
||||||
lines_to_copy = []
|
|
||||||
elif "##" not in line:
|
|
||||||
lines_to_copy.append(line)
|
|
||||||
|
|
||||||
remove(path_to_datafile)
|
|
||||||
|
|
||||||
replace_in_files(f"{directory}/to_replace_{lowercase_model_name}.py")
|
|
||||||
os.rmdir(directory)
|
|
@ -15,7 +15,6 @@
|
|||||||
|
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
from .add_new_model import AddNewModelCommand
|
|
||||||
from .add_new_model_like import AddNewModelLikeCommand
|
from .add_new_model_like import AddNewModelLikeCommand
|
||||||
from .convert import ConvertCommand
|
from .convert import ConvertCommand
|
||||||
from .download import DownloadCommand
|
from .download import DownloadCommand
|
||||||
@ -38,7 +37,6 @@ def main():
|
|||||||
RunCommand.register_subcommand(commands_parser)
|
RunCommand.register_subcommand(commands_parser)
|
||||||
ServeCommand.register_subcommand(commands_parser)
|
ServeCommand.register_subcommand(commands_parser)
|
||||||
UserCommands.register_subcommand(commands_parser)
|
UserCommands.register_subcommand(commands_parser)
|
||||||
AddNewModelCommand.register_subcommand(commands_parser)
|
|
||||||
AddNewModelLikeCommand.register_subcommand(commands_parser)
|
AddNewModelLikeCommand.register_subcommand(commands_parser)
|
||||||
LfsCommands.register_subcommand(commands_parser)
|
LfsCommands.register_subcommand(commands_parser)
|
||||||
PTtoTFCommand.register_subcommand(commands_parser)
|
PTtoTFCommand.register_subcommand(commands_parser)
|
||||||
|
@ -16,257 +16,8 @@ limitations under the License.
|
|||||||
|
|
||||||
# Adding a new model
|
# Adding a new model
|
||||||
|
|
||||||
This folder contains templates to generate new models that fit the current API and pass all tests. It generates
|
This page has been updated in light of the removal of the `add_new_model` script in favor of the more complete
|
||||||
models in both PyTorch, TensorFlow, and Flax and completes the `__init__.py` and auto-modeling files, and creates the
|
`add_new_model_like` script.
|
||||||
documentation. Their use is described in the [next section](#cookiecutter-templates).
|
|
||||||
|
|
||||||
There is also a CLI tool to generate a new model like an existing one called `transformers-cli add-new-model-like`.
|
We recommend you checkout the documentation of [How to add a model](https://huggingface.co/docs/transformers/main/en/add_new_model)
|
||||||
Jump to the [Add new model like section](#add-new-model-like-command) to learn how to use it.
|
in the Hugging Face Transformers documentation for complete and up-to-date instructions.
|
||||||
|
|
||||||
## Cookiecutter Templates
|
|
||||||
|
|
||||||
Using the `cookiecutter` utility requires to have all the `dev` dependencies installed. Let's first clone the
|
|
||||||
repository and install it in our environment:
|
|
||||||
|
|
||||||
```shell script
|
|
||||||
git clone https://github.com/huggingface/transformers
|
|
||||||
cd transformers
|
|
||||||
pip install -e ".[dev]"
|
|
||||||
```
|
|
||||||
|
|
||||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
|
||||||
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
|
|
||||||
(PyTorch, TensorFlow and/or Flax) then do:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -e ".[quality]"
|
|
||||||
```
|
|
||||||
|
|
||||||
Once the installation is done, you can use the CLI command `add-new-model` to generate your models:
|
|
||||||
|
|
||||||
```shell script
|
|
||||||
transformers-cli add-new-model
|
|
||||||
```
|
|
||||||
|
|
||||||
This should launch the `cookiecutter` package which should prompt you to fill in the configuration.
|
|
||||||
|
|
||||||
The `modelname` should be cased according to the plain text casing, i.e., BERT, RoBERTa, DeBERTa.
|
|
||||||
```
|
|
||||||
modelname [<ModelNAME>]:
|
|
||||||
uppercase_modelname [<MODEL_NAME>]:
|
|
||||||
lowercase_modelname [<model_name>]:
|
|
||||||
camelcase_modelname [<ModelName>]:
|
|
||||||
```
|
|
||||||
|
|
||||||
Fill in the `authors` with your team members:
|
|
||||||
```
|
|
||||||
authors [The HuggingFace Team]:
|
|
||||||
```
|
|
||||||
|
|
||||||
The checkpoint identifier is the checkpoint that will be used in the examples across the files. Put the name you wish,
|
|
||||||
as it will appear on the modelhub. Do not forget to include the organisation.
|
|
||||||
```
|
|
||||||
checkpoint_identifier [organisation/<model_name>-base-cased]:
|
|
||||||
```
|
|
||||||
|
|
||||||
The tokenizer should either be based on BERT if it behaves exactly like the BERT tokenizer, or a standalone otherwise.
|
|
||||||
```
|
|
||||||
Select tokenizer_type:
|
|
||||||
1 - Based on BERT
|
|
||||||
2 - Standalone
|
|
||||||
Choose from 1, 2 [1]:
|
|
||||||
```
|
|
||||||
<!---
|
|
||||||
Choose if your model is an encoder-decoder, or an encoder-only architecture.
|
|
||||||
|
|
||||||
If your model is an encoder-only architecture, the generated architecture will be based on the BERT model.
|
|
||||||
If your model is an encoder-decoder architecture, the generated architecture will be based on the BART model. You can,
|
|
||||||
of course, edit the files once the generation is complete.
|
|
||||||
```
|
|
||||||
Select is_encoder_decoder_model:
|
|
||||||
1 - True
|
|
||||||
2 - False
|
|
||||||
Choose from 1, 2 [1]:
|
|
||||||
```
|
|
||||||
-->
|
|
||||||
|
|
||||||
Once the command has finished, you should have a total of 7 new files spread across the repository:
|
|
||||||
```
|
|
||||||
docs/source/model_doc/<model_name>.md
|
|
||||||
src/transformers/models/<model_name>/configuration_<model_name>.py
|
|
||||||
src/transformers/models/<model_name>/modeling_<model_name>.py
|
|
||||||
src/transformers/models/<model_name>/modeling_tf_<model_name>.py
|
|
||||||
src/transformers/models/<model_name>/tokenization_<model_name>.py
|
|
||||||
tests/models/<model_name>/test_modeling_<model_name>.py
|
|
||||||
tests/models/<model_name>/test_modeling_tf_<model_name>.py
|
|
||||||
```
|
|
||||||
|
|
||||||
You can run the tests to ensure that they all pass:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest ./tests/test_*<model_name>*.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Feel free to modify each file to mimic the behavior of your model.
|
|
||||||
|
|
||||||
⚠ You should be careful about the classes preceded by the following line:️
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Copied from transformers.[...]
|
|
||||||
```
|
|
||||||
|
|
||||||
This line ensures that the copy does not diverge from the source. If it *should* diverge, because the implementation
|
|
||||||
is different, this line needs to be deleted. If you don't delete this line and run `make fix-copies`,
|
|
||||||
your changes will be overwritten.
|
|
||||||
|
|
||||||
Once you have edited the files to fit your architecture, simply re-run the tests (and edit them if a change
|
|
||||||
is needed!) afterwards to make sure everything works as expected.
|
|
||||||
|
|
||||||
Once the files are generated and you are happy with your changes, here's a checklist to ensure that your contribution
|
|
||||||
will be merged quickly:
|
|
||||||
|
|
||||||
- You should run the `make fixup` utility to fix the style of the files and to ensure the code quality meets the
|
|
||||||
library's standards.
|
|
||||||
- You should complete the documentation file (`docs/source/model_doc/<model_name>.rst`) so that your model may be
|
|
||||||
usable.
|
|
||||||
|
|
||||||
## Add new model like command
|
|
||||||
|
|
||||||
Using the `transformers-cli add-new-model-like` command requires to have all the `dev` dependencies installed. Let's
|
|
||||||
first clone the repository and install it in our environment:
|
|
||||||
|
|
||||||
```shell script
|
|
||||||
git clone https://github.com/huggingface/transformers
|
|
||||||
cd transformers
|
|
||||||
pip install -e ".[dev]"
|
|
||||||
```
|
|
||||||
|
|
||||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
|
||||||
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
|
|
||||||
(PyTorch, TensorFlow and/or Flax) then do:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -e ".[quality]"
|
|
||||||
```
|
|
||||||
|
|
||||||
Once the installation is done, you can use the CLI command `add-new-model-like` to generate your models:
|
|
||||||
|
|
||||||
```shell script
|
|
||||||
transformers-cli add-new-model-like
|
|
||||||
```
|
|
||||||
|
|
||||||
This will start a small questionnaire you have to fill.
|
|
||||||
|
|
||||||
```
|
|
||||||
What identifier would you like to use for the model type of this model?
|
|
||||||
```
|
|
||||||
|
|
||||||
You will have to input the model type of the model you want to clone. The model type can be found in several places:
|
|
||||||
- inside the configuration of any checkpoint of that model
|
|
||||||
- the name of the documentation page of that model
|
|
||||||
|
|
||||||
For instance the doc page of `BigBirdPegasus` is `https://huggingface.co/docs/transformers/model_doc/bigbird_pegasus`
|
|
||||||
so its model type is `"bigbird_pegasus"`.
|
|
||||||
|
|
||||||
If you make a typo, the command will suggest you the closest model types it can find.
|
|
||||||
|
|
||||||
Once this is done, the questionnaire will ask you for the new model name and its various casings:
|
|
||||||
|
|
||||||
```
|
|
||||||
What is the name for your new model?
|
|
||||||
What identifier would you like to use for the model type of this model?
|
|
||||||
What name would you like to use for the module of this model?
|
|
||||||
What prefix (camel-cased) would you like to use for the model classes of this model?
|
|
||||||
What prefix (upper-cased) would you like to use for the constants relative to this model?
|
|
||||||
```
|
|
||||||
|
|
||||||
From your answer to the first question, defaults will be determined for all others. The first name should be written
|
|
||||||
as you want your model be named in the doc, with no special casing (like RoBERTa) and from there, you can either stick
|
|
||||||
with the defaults or change the cased versions.
|
|
||||||
|
|
||||||
Next will be the name of the config class to use for this model:
|
|
||||||
|
|
||||||
```
|
|
||||||
What will be the name of the config class for this model?
|
|
||||||
```
|
|
||||||
|
|
||||||
Then, you will be asked for a checkpoint identifier:
|
|
||||||
|
|
||||||
```
|
|
||||||
Please give a checkpoint identifier (on the model Hub) for this new model.
|
|
||||||
```
|
|
||||||
|
|
||||||
This is the checkpoint that will be used in the examples across the files and the integration tests. Put the name you
|
|
||||||
wish, as it will appear on the Model Hub. Do not forget to include the organisation.
|
|
||||||
|
|
||||||
Then you will have to say whether your model re-uses the same processing classes as the model you're cloning:
|
|
||||||
|
|
||||||
```
|
|
||||||
Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor/XxxImageProcessor)
|
|
||||||
```
|
|
||||||
|
|
||||||
Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different
|
|
||||||
files (for instance you can reuse the `BertTokenizer` with a new vocab file).
|
|
||||||
|
|
||||||
If you answer no, you will have to give the name of the classes
|
|
||||||
for the new tokenizer/image processor/feature extractor/processor (depending on the model you're cloning).
|
|
||||||
|
|
||||||
Next the questionnaire will ask
|
|
||||||
|
|
||||||
```
|
|
||||||
Should we add # Copied from statements when creating the new modeling file?
|
|
||||||
```
|
|
||||||
|
|
||||||
This is the internal mechanism used in the library to make sure code copied from various modeling files stay consistent.
|
|
||||||
If you plan to completely rewrite the modeling file, you should answer no, whereas if you just want to tweak one part
|
|
||||||
of the model, you should answer yes.
|
|
||||||
|
|
||||||
Lastly, the questionnaire will inquire about frameworks:
|
|
||||||
|
|
||||||
```
|
|
||||||
Should we add a version of your new model in all the frameworks implemented by Old Model (xxx)?
|
|
||||||
```
|
|
||||||
|
|
||||||
If you answer yes, the new model will have files for all the frameworks implemented by the model you're cloning.
|
|
||||||
Otherwise, you will get a new question to select the frameworks you want.
|
|
||||||
|
|
||||||
Once the command has finished, you will see a new subfolder in the `src/transformers/models/` folder, with the
|
|
||||||
necessary files (configuration and modeling files for all frameworks requested, and maybe the processing files,
|
|
||||||
depending on your choices).
|
|
||||||
|
|
||||||
You will also see a doc file and tests for your new models. First you should run
|
|
||||||
|
|
||||||
```bash
|
|
||||||
make style
|
|
||||||
make fix-copies
|
|
||||||
```
|
|
||||||
|
|
||||||
and then you can start tweaking your model. You should:
|
|
||||||
- fill the doc file at `docs/source/model_doc/model_name.md`
|
|
||||||
- tweak the configuration and modeling files to your need
|
|
||||||
|
|
||||||
Once you're done, you can run the tests to ensure that they all pass:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest ./tests/test_*<model_name>*.py
|
|
||||||
```
|
|
||||||
|
|
||||||
⚠ You should be careful about the classes preceded by the following line:️
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Copied from transformers.[...]
|
|
||||||
```
|
|
||||||
|
|
||||||
This line ensures that the copy does not diverge from the source. If it *should* diverge, because the implementation
|
|
||||||
is different, this line needs to be deleted. If you don't delete this line and run `make fix-copies`,
|
|
||||||
your changes will be overwritten.
|
|
||||||
|
|
||||||
Once you have edited the files to fit your architecture, simply re-run the tests (and edit them if a change
|
|
||||||
is needed!) afterwards to make sure everything works as expected.
|
|
||||||
|
|
||||||
Once the files are generated and you are happy with your changes, here's a checklist to ensure that your contribution
|
|
||||||
will be merged quickly:
|
|
||||||
|
|
||||||
- You should run the `make fixup` utility to fix the style of the files and to ensure the code quality meets the
|
|
||||||
library's standards.
|
|
||||||
- You should add your model to the main README then run `make fix-copies`.
|
|
||||||
|
@ -1,286 +0,0 @@
|
|||||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from ...utils import _LazyModule, OptionalDependencyNotAvailable, is_tokenizers_available
|
|
||||||
|
|
||||||
|
|
||||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
from ...utils import is_tf_available
|
|
||||||
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
from ...utils import is_torch_available
|
|
||||||
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
from ...utils import is_flax_available
|
|
||||||
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
_import_structure = {
|
|
||||||
"configuration_{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP", "{{cookiecutter.camelcase_modelname}}Config"],
|
|
||||||
"tokenization_{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not is_tokenizers_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["tokenization_{{cookiecutter.lowercase_modelname}}_fast"] = ["{{cookiecutter.camelcase_modelname}}TokenizerFast"]
|
|
||||||
|
|
||||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_torch_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
"load_tf_weights_in_{{cookiecutter.lowercase_modelname}}",
|
|
||||||
]
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_torch_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
|
|
||||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_tf_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_tf_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_tf_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_tf_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
|
|
||||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_flax_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_flax_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_flax_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
_import_structure["modeling_flax_{{cookiecutter.lowercase_modelname}}"] = [
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP, {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not is_tokenizers_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
|
||||||
|
|
||||||
{%- if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_torch_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_torch_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
{%- if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_tf_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_tf_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
{%- if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax %}
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
try:
|
|
||||||
if not is_flax_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
try:
|
|
||||||
if not is_flax_available():
|
|
||||||
raise OptionalDependencyNotAvailable()
|
|
||||||
except OptionalDependencyNotAvailable:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
else:
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "{{cookiecutter.modelname}}",
|
|
||||||
"uppercase_modelname": "{{cookiecutter.uppercase_modelname}}",
|
|
||||||
"lowercase_modelname": "{{cookiecutter.lowercase_modelname}}",
|
|
||||||
"camelcase_modelname": "{{cookiecutter.camelcase_modelname}}",
|
|
||||||
"authors": "{{cookiecutter.authors}}",
|
|
||||||
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
|
|
||||||
"tokenizer_type": "{{cookiecutter.tokenizer_type}}",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "{{cookiecutter.generate_tensorflow_pytorch_and_flax}}",
|
|
||||||
"is_encoder_decoder_model": "{{cookiecutter.is_encoder_decoder_model}}"
|
|
||||||
}
|
|
@ -1,235 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
""" {{cookiecutter.modelname}} model configuration """
|
|
||||||
|
|
||||||
from ...configuration_utils import PretrainedConfig
|
|
||||||
from ...utils import logging
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|
||||||
r"""
|
|
||||||
This is the configuration class to store the configuration of a [`~{{cookiecutter.camelcase_modelname}}Model`].
|
|
||||||
It is used to instantiate an {{cookiecutter.modelname}} model according to the specified arguments, defining the model
|
|
||||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
|
||||||
the {{cookiecutter.modelname}} [{{cookiecutter.checkpoint_identifier}}](https://huggingface.co/{{cookiecutter.checkpoint_identifier}}) architecture.
|
|
||||||
|
|
||||||
Configuration objects inherit from [`PretrainedConfig`] and can be used
|
|
||||||
to control the model outputs. Read the documentation from [`PretrainedConfig`]
|
|
||||||
for more information.
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
vocab_size (`int`, *optional*, defaults to 30522):
|
|
||||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
|
||||||
`inputs_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
|
||||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
|
||||||
hidden_size (`int`, *optional*, defaults to 768):
|
|
||||||
Dimension of the encoder layers and the pooler layer.
|
|
||||||
num_hidden_layers (`int`, *optional*, defaults to 12):
|
|
||||||
Number of hidden layers in the Transformer encoder.
|
|
||||||
num_attention_heads (`int`, *optional*, defaults to 12):
|
|
||||||
Number of attention heads for each attention layer in the Transformer encoder.
|
|
||||||
intermediate_size (`int`, *optional*, defaults to 3072):
|
|
||||||
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
|
||||||
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
|
|
||||||
The non-linear activation function (function or string) in the encoder and pooler.
|
|
||||||
If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
|
||||||
hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
|
|
||||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
|
||||||
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
|
|
||||||
The dropout ratio for the attention probabilities.
|
|
||||||
max_position_embeddings (`int`, *optional*, defaults to 512):
|
|
||||||
The maximum sequence length that this model might ever be used with.
|
|
||||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
|
||||||
type_vocab_size (`int`, *optional*, defaults to 2):
|
|
||||||
The vocabulary size of the `token_type_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
|
||||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
|
||||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
|
||||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
|
||||||
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
|
|
||||||
The epsilon used by the layer normalization layers.
|
|
||||||
use_cache (`bool`, *optional*, defaults to `True`):
|
|
||||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
|
||||||
relevant if `config.is_decoder=True`.
|
|
||||||
{% else -%}
|
|
||||||
vocab_size (`int`, *optional*, defaults to 50265):
|
|
||||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
|
||||||
`inputs_ids` passed when calling [`~{{cookiecutter.camelcase_modelname}}Model`] or
|
|
||||||
[`~TF{{cookiecutter.camelcase_modelname}}Model`].
|
|
||||||
d_model (`int`, *optional*, defaults to 1024):
|
|
||||||
Dimension of the layers and the pooler layer.
|
|
||||||
encoder_layers (`int`, *optional*, defaults to 12):
|
|
||||||
Number of encoder layers.
|
|
||||||
decoder_layers (`int`, *optional*, defaults to 12):
|
|
||||||
Number of decoder layers.
|
|
||||||
encoder_attention_heads (`int`, *optional*, defaults to 16):
|
|
||||||
Number of attention heads for each attention layer in the Transformer encoder.
|
|
||||||
decoder_attention_heads (`int`, *optional*, defaults to 16):
|
|
||||||
Number of attention heads for each attention layer in the Transformer decoder.
|
|
||||||
decoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
|
||||||
Dimension of the "intermediate" (often named feed-forward) layer in decoder.
|
|
||||||
encoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
|
||||||
Dimension of the "intermediate" (often named feed-forward) layer in decoder.
|
|
||||||
activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
|
|
||||||
The non-linear activation function (function or string) in the encoder and pooler. If string,
|
|
||||||
`"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
|
|
||||||
dropout (`float`, *optional*, defaults to 0.1):
|
|
||||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
|
||||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
|
||||||
The dropout ratio for the attention probabilities.
|
|
||||||
activation_dropout (`float`, *optional*, defaults to 0.0):
|
|
||||||
The dropout ratio for activations inside the fully connected layer.
|
|
||||||
classifier_dropout (`float`, *optional*, defaults to 0.0):
|
|
||||||
The dropout ratio for classifier.
|
|
||||||
max_position_embeddings (`int`, *optional*, defaults to 1024):
|
|
||||||
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
|
||||||
just in case (e.g., 512 or 1024 or 2048).
|
|
||||||
init_std (`float`, *optional*, defaults to 0.02):
|
|
||||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
|
||||||
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
|
||||||
The LayerDrop probability for the encoder. See the [LayerDrop paper](see
|
|
||||||
https://arxiv.org/abs/1909.11556) for more details.
|
|
||||||
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
|
||||||
The LayerDrop probability for the decoder. See the [LayerDrop paper](see
|
|
||||||
https://arxiv.org/abs/1909.11556) for more details.
|
|
||||||
use_cache (`bool`, *optional*, defaults to `True`):
|
|
||||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
|
||||||
{% endif -%}
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
```python
|
|
||||||
>>> from transformers import {{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
|
|
||||||
>>> # Initializing a {{cookiecutter.modelname}} {{cookiecutter.checkpoint_identifier}} style configuration
|
|
||||||
>>> configuration = {{cookiecutter.camelcase_modelname}}Config()
|
|
||||||
|
|
||||||
>>> # Initializing a model from the {{cookiecutter.checkpoint_identifier}} style configuration
|
|
||||||
>>> model = {{cookiecutter.camelcase_modelname}}Model(configuration)
|
|
||||||
|
|
||||||
>>> # Accessing the model configuration
|
|
||||||
>>> configuration = model.config
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
model_type = "{{cookiecutter.lowercase_modelname}}"
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else -%}
|
|
||||||
keys_to_ignore_at_inference = ["past_key_values"]
|
|
||||||
{% endif -%}
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
{%- else %}
|
|
||||||
attribute_map = {
|
|
||||||
"num_attention_heads": "encoder_attention_heads",
|
|
||||||
"hidden_size": "d_model"
|
|
||||||
}
|
|
||||||
|
|
||||||
{%- endif %}
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
vocab_size=30522,
|
|
||||||
hidden_size=768,
|
|
||||||
num_hidden_layers=12,
|
|
||||||
num_attention_heads=12,
|
|
||||||
intermediate_size=3072,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
layer_norm_eps=1e-12,
|
|
||||||
use_cache=True,
|
|
||||||
{% else -%}
|
|
||||||
vocab_size=50265,
|
|
||||||
max_position_embeddings=1024,
|
|
||||||
encoder_layers=12,
|
|
||||||
encoder_ffn_dim=4096,
|
|
||||||
encoder_attention_heads=16,
|
|
||||||
decoder_layers=12,
|
|
||||||
decoder_ffn_dim=4096,
|
|
||||||
decoder_attention_heads=16,
|
|
||||||
encoder_layerdrop=0.0,
|
|
||||||
decoder_layerdrop=0.0,
|
|
||||||
use_cache=True,
|
|
||||||
is_encoder_decoder=True,
|
|
||||||
activation_function="gelu",
|
|
||||||
d_model=1024,
|
|
||||||
dropout=0.1,
|
|
||||||
attention_dropout=0.0,
|
|
||||||
activation_dropout=0.0,
|
|
||||||
init_std=0.02,
|
|
||||||
decoder_start_token_id=2,
|
|
||||||
classifier_dropout=0.0,
|
|
||||||
scale_embedding=False,
|
|
||||||
{% endif -%}
|
|
||||||
pad_token_id=1,
|
|
||||||
bos_token_id=0,
|
|
||||||
eos_token_id=2,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
self.vocab_size = vocab_size
|
|
||||||
self.max_position_embeddings = max_position_embeddings
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
self.hidden_size = hidden_size
|
|
||||||
self.num_hidden_layers = num_hidden_layers
|
|
||||||
self.num_attention_heads = num_attention_heads
|
|
||||||
self.intermediate_size = intermediate_size
|
|
||||||
self.hidden_act = hidden_act
|
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
|
||||||
self.initializer_range = initializer_range
|
|
||||||
self.type_vocab_size = type_vocab_size
|
|
||||||
self.layer_norm_eps = layer_norm_eps
|
|
||||||
self.use_cache = use_cache
|
|
||||||
{% else -%}
|
|
||||||
self.d_model = d_model
|
|
||||||
self.encoder_ffn_dim = encoder_ffn_dim
|
|
||||||
self.encoder_layers = encoder_layers
|
|
||||||
self.encoder_attention_heads = encoder_attention_heads
|
|
||||||
self.decoder_ffn_dim = decoder_ffn_dim
|
|
||||||
self.decoder_layers = decoder_layers
|
|
||||||
self.decoder_attention_heads = decoder_attention_heads
|
|
||||||
self.dropout = dropout
|
|
||||||
self.attention_dropout = attention_dropout
|
|
||||||
self.activation_dropout = activation_dropout
|
|
||||||
self.activation_function = activation_function
|
|
||||||
self.init_std = init_std
|
|
||||||
self.encoder_layerdrop = encoder_layerdrop
|
|
||||||
self.decoder_layerdrop = decoder_layerdrop
|
|
||||||
self.classifier_dropout = classifier_dropout
|
|
||||||
self.use_cache = use_cache
|
|
||||||
self.num_hidden_layers = encoder_layers
|
|
||||||
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
|
||||||
|
|
||||||
{% endif -%}
|
|
||||||
super().__init__(
|
|
||||||
pad_token_id=pad_token_id,
|
|
||||||
bos_token_id=bos_token_id,
|
|
||||||
eos_token_id=eos_token_id,
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else -%}
|
|
||||||
is_encoder_decoder=is_encoder_decoder,
|
|
||||||
decoder_start_token_id=decoder_start_token_id,
|
|
||||||
{% endif -%}
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,669 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from transformers import is_flax_available, {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
from transformers.testing_utils import require_flax, slow
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
|
||||||
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor
|
|
||||||
|
|
||||||
if is_flax_available():
|
|
||||||
import numpy as np
|
|
||||||
from transformers import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTester:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
|
||||||
self.batch_size = 13
|
|
||||||
self.seq_length = 7
|
|
||||||
self.is_training = True
|
|
||||||
self.use_input_mask = True
|
|
||||||
self.use_token_type_ids = True
|
|
||||||
self.use_labels = True
|
|
||||||
self.vocab_size = 99
|
|
||||||
self.hidden_size = 32
|
|
||||||
self.num_hidden_layers = 5
|
|
||||||
self.num_attention_heads = 4
|
|
||||||
self.intermediate_size = 37
|
|
||||||
self.hidden_act = "gelu"
|
|
||||||
self.hidden_dropout_prob = 0.1
|
|
||||||
self.attention_probs_dropout_prob = 0.1
|
|
||||||
self.max_position_embeddings = 512
|
|
||||||
self.type_vocab_size = 16
|
|
||||||
self.type_sequence_label_size = 2
|
|
||||||
self.initializer_range = 0.02
|
|
||||||
self.num_labels = 3
|
|
||||||
self.num_choices = 4
|
|
||||||
self.scope = None
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
|
||||||
|
|
||||||
input_mask = None
|
|
||||||
if self.use_input_mask:
|
|
||||||
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
|
||||||
|
|
||||||
token_type_ids = None
|
|
||||||
if self.use_token_type_ids:
|
|
||||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
|
||||||
|
|
||||||
sequence_labels = None
|
|
||||||
token_labels = None
|
|
||||||
choice_labels = None
|
|
||||||
if self.use_labels:
|
|
||||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
|
||||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
|
||||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
|
||||||
|
|
||||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
|
||||||
vocab_size=self.vocab_size,
|
|
||||||
hidden_size=self.hidden_size,
|
|
||||||
num_hidden_layers=self.num_hidden_layers,
|
|
||||||
num_attention_heads=self.num_attention_heads,
|
|
||||||
intermediate_size=self.intermediate_size,
|
|
||||||
hidden_act=self.hidden_act,
|
|
||||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
|
||||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
|
||||||
type_vocab_size=self.type_vocab_size,
|
|
||||||
initializer_range=self.initializer_range,
|
|
||||||
return_dict=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
|
|
||||||
def create_and_check_model(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}Model(config=config)
|
|
||||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
|
||||||
|
|
||||||
inputs = [input_ids, input_mask]
|
|
||||||
|
|
||||||
result = model(*inputs)
|
|
||||||
|
|
||||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
|
||||||
|
|
||||||
def create_and_check_lm_head(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
prediction_scores = model(**inputs)["logits"]
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(prediction_scores.shape), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_masked_lm(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(**inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
|
||||||
|
|
||||||
def create_and_check_for_sequence_classification(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_labels = self.num_labels
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = model(**inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
|
||||||
|
|
||||||
def create_and_check_for_multiple_choice(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_choices = self.num_choices
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
|
|
||||||
multiple_choice_inputs_ids = np.tile(np.expand_dims(input_ids, 1), (1, self.num_choices, 1))
|
|
||||||
multiple_choice_input_mask = np.tile(np.expand_dims(input_mask, 1), (1, self.num_choices, 1))
|
|
||||||
multiple_choice_token_type_ids = np.tile(np.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
|
|
||||||
inputs = {
|
|
||||||
"input_ids": multiple_choice_inputs_ids,
|
|
||||||
"attention_mask": multiple_choice_input_mask,
|
|
||||||
"token_type_ids": multiple_choice_token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(**inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
|
||||||
|
|
||||||
def create_and_check_for_token_classification(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_labels = self.num_labels
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(**inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
|
||||||
|
|
||||||
def create_and_check_for_question_answering(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = model(**inputs)
|
|
||||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
|
||||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
(
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
) = config_and_inputs
|
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
|
||||||
return config, inputs_dict
|
|
||||||
|
|
||||||
|
|
||||||
@require_flax
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
)
|
|
||||||
if is_flax_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
test_head_masking = False
|
|
||||||
test_onnx = False
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.model_tester = Flax{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
|
||||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
|
|
||||||
|
|
||||||
def test_config(self):
|
|
||||||
self.config_tester.run_common_tests()
|
|
||||||
|
|
||||||
def test_model(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_masked_lm(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_causal_lm(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_lm_head(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_multiple_choice(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_question_answering(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_sequence_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_token_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
@slow
|
|
||||||
def test_model_from_pretrained(self):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}Model.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
|
||||||
self.assertIsNotNone(model)
|
|
||||||
|
|
||||||
|
|
||||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
|
||||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
|
||||||
if a is None and b is None:
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
if _assert_tensors_equal(a, b, atol=atol):
|
|
||||||
return True
|
|
||||||
raise
|
|
||||||
except Exception:
|
|
||||||
if len(prefix) > 0:
|
|
||||||
prefix = f"{prefix}: "
|
|
||||||
raise AssertionError(f"{prefix}{a} != {b}")
|
|
||||||
|
|
||||||
|
|
||||||
@require_flax
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
|
||||||
@slow
|
|
||||||
def test_inference_masked_lm(self):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
|
||||||
input_ids = np.array([[0, 1, 2, 3, 4, 5]])
|
|
||||||
output = model(input_ids)[0]
|
|
||||||
|
|
||||||
# TODO Replace vocab size
|
|
||||||
vocab_size = 32000
|
|
||||||
|
|
||||||
expected_shape = [1, 6, vocab_size]
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
|
|
||||||
print(output[:, :3, :3])
|
|
||||||
|
|
||||||
# TODO Replace values below with what was printed above.
|
|
||||||
expected_slice = np.array(
|
|
||||||
[
|
|
||||||
[
|
|
||||||
[-0.05243197, -0.04498899, 0.05512108],
|
|
||||||
[-0.07444685, -0.01064632, 0.04352357],
|
|
||||||
[-0.05020351, 0.05530146, 0.00700043],
|
|
||||||
]
|
|
||||||
]
|
|
||||||
)
|
|
||||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=1e-4)
|
|
||||||
|
|
||||||
{% else %}
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from transformers import (
|
|
||||||
is_flax_available,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Config,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
|
||||||
)
|
|
||||||
from transformers.testing_utils import require_sentencepiece, require_flax, require_tokenizers, slow
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
|
||||||
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor
|
|
||||||
|
|
||||||
|
|
||||||
if is_flax_available():
|
|
||||||
import numpy as np
|
|
||||||
import jax.numpy as jnp
|
|
||||||
from transformers import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@require_flax
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTester:
|
|
||||||
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
config_updates = {}
|
|
||||||
hidden_act = "gelu"
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_labels=False,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=20,
|
|
||||||
eos_token_id=2,
|
|
||||||
pad_token_id=1,
|
|
||||||
bos_token_id=0,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.seq_length = seq_length
|
|
||||||
self.is_training = is_training
|
|
||||||
self.use_labels = use_labels
|
|
||||||
self.vocab_size = vocab_size
|
|
||||||
self.hidden_size = hidden_size
|
|
||||||
self.num_hidden_layers = num_hidden_layers
|
|
||||||
self.num_attention_heads = num_attention_heads
|
|
||||||
self.intermediate_size = intermediate_size
|
|
||||||
|
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
|
||||||
self.max_position_embeddings = max_position_embeddings
|
|
||||||
self.eos_token_id = eos_token_id
|
|
||||||
self.pad_token_id = pad_token_id
|
|
||||||
self.bos_token_id = bos_token_id
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size).clip(3, self.vocab_size)
|
|
||||||
eos_tensor = np.expand_dims(np.array([self.eos_token_id] * self.batch_size), 1)
|
|
||||||
input_ids = np.concatenate([input_ids, eos_tensor], axis=1)
|
|
||||||
|
|
||||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
|
||||||
|
|
||||||
config = self.config_cls(
|
|
||||||
vocab_size=self.vocab_size,
|
|
||||||
d_model=self.hidden_size,
|
|
||||||
encoder_layers=self.num_hidden_layers,
|
|
||||||
decoder_layers=self.num_hidden_layers,
|
|
||||||
encoder_attention_heads=self.num_attention_heads,
|
|
||||||
decoder_attention_heads=self.num_attention_heads,
|
|
||||||
encoder_ffn_dim=self.intermediate_size,
|
|
||||||
decoder_ffn_dim=self.intermediate_size,
|
|
||||||
dropout=self.hidden_dropout_prob,
|
|
||||||
attention_dropout=self.attention_probs_dropout_prob,
|
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
|
||||||
eos_token_ids=[2],
|
|
||||||
bos_token_id=self.bos_token_id,
|
|
||||||
pad_token_id=self.pad_token_id,
|
|
||||||
decoder_start_token_id=self.pad_token_id,
|
|
||||||
**self.config_updates,
|
|
||||||
)
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
|
||||||
return config, inputs_dict
|
|
||||||
|
|
||||||
def check_use_cache_forward(self, model_class_name, config, inputs_dict):
|
|
||||||
max_decoder_length = 20
|
|
||||||
model = model_class_name(config)
|
|
||||||
|
|
||||||
encoder_outputs = model.encode(inputs_dict["input_ids"])
|
|
||||||
|
|
||||||
decoder_input_ids, decoder_attention_mask = (
|
|
||||||
inputs_dict["decoder_input_ids"],
|
|
||||||
inputs_dict["decoder_attention_mask"],
|
|
||||||
)
|
|
||||||
|
|
||||||
past_key_values = model.init_cache(decoder_input_ids.shape[0], max_decoder_length, encoder_outputs)
|
|
||||||
decoder_attention_mask = jnp.ones((decoder_input_ids.shape[0], max_decoder_length), dtype="i4")
|
|
||||||
|
|
||||||
decoder_position_ids = jnp.broadcast_to(
|
|
||||||
jnp.arange(decoder_input_ids.shape[-1] - 1)[None, :],
|
|
||||||
(decoder_input_ids.shape[0], decoder_input_ids.shape[-1] - 1),
|
|
||||||
)
|
|
||||||
outputs_cache = model.decode(
|
|
||||||
decoder_input_ids[:, :-1],
|
|
||||||
encoder_outputs,
|
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
|
||||||
past_key_values=past_key_values,
|
|
||||||
decoder_position_ids=decoder_position_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
decoder_position_ids = jnp.array(decoder_input_ids.shape[0] * [[decoder_input_ids.shape[-1] - 1]], dtype="i4")
|
|
||||||
outputs_cache_next = model.decode(
|
|
||||||
decoder_input_ids[:, -1:],
|
|
||||||
encoder_outputs,
|
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
|
||||||
past_key_values=outputs_cache.past_key_values,
|
|
||||||
decoder_position_ids=decoder_position_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
outputs = model.decode(decoder_input_ids, encoder_outputs)
|
|
||||||
|
|
||||||
diff = np.max(np.abs((outputs_cache_next[0][:, -1, :5] - outputs[0][:, -1, :5])))
|
|
||||||
self.parent.assertTrue(diff < 1e-3, msg=f"Max diff is {diff}")
|
|
||||||
|
|
||||||
def check_use_cache_forward_with_attn_mask(self, model_class_name, config, inputs_dict):
|
|
||||||
max_decoder_length = 20
|
|
||||||
model = model_class_name(config)
|
|
||||||
|
|
||||||
encoder_outputs = model.encode(inputs_dict["input_ids"])
|
|
||||||
|
|
||||||
decoder_input_ids, decoder_attention_mask = (
|
|
||||||
inputs_dict["decoder_input_ids"],
|
|
||||||
inputs_dict["decoder_attention_mask"],
|
|
||||||
)
|
|
||||||
|
|
||||||
decoder_attention_mask_cache = jnp.concatenate(
|
|
||||||
[
|
|
||||||
decoder_attention_mask,
|
|
||||||
jnp.zeros((decoder_attention_mask.shape[0], max_decoder_length - decoder_attention_mask.shape[1])),
|
|
||||||
],
|
|
||||||
axis=-1,
|
|
||||||
)
|
|
||||||
|
|
||||||
past_key_values = model.init_cache(decoder_input_ids.shape[0], max_decoder_length, encoder_outputs)
|
|
||||||
decoder_position_ids = jnp.broadcast_to(
|
|
||||||
jnp.arange(decoder_input_ids.shape[-1] - 1)[None, :],
|
|
||||||
(decoder_input_ids.shape[0], decoder_input_ids.shape[-1] - 1),
|
|
||||||
)
|
|
||||||
|
|
||||||
outputs_cache = model.decode(
|
|
||||||
decoder_input_ids[:, :-1],
|
|
||||||
encoder_outputs,
|
|
||||||
decoder_attention_mask=decoder_attention_mask_cache,
|
|
||||||
past_key_values=past_key_values,
|
|
||||||
decoder_position_ids=decoder_position_ids,
|
|
||||||
)
|
|
||||||
decoder_position_ids = jnp.array(decoder_input_ids.shape[0] * [[decoder_input_ids.shape[-1] - 1]], dtype="i4")
|
|
||||||
outputs_cache_next = model.decode(
|
|
||||||
decoder_input_ids[:, -1:],
|
|
||||||
encoder_outputs,
|
|
||||||
past_key_values=outputs_cache.past_key_values,
|
|
||||||
decoder_attention_mask=decoder_attention_mask_cache,
|
|
||||||
decoder_position_ids=decoder_position_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
outputs = model.decode(decoder_input_ids, encoder_outputs, decoder_attention_mask=decoder_attention_mask)
|
|
||||||
|
|
||||||
diff = np.max(np.abs((outputs_cache_next[0][:, -1, :5] - outputs[0][:, -1, :5])))
|
|
||||||
self.parent.assertTrue(diff < 1e-3, msg=f"Max diff is {diff}")
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
decoder_input_ids,
|
|
||||||
attention_mask=None,
|
|
||||||
decoder_attention_mask=None,
|
|
||||||
):
|
|
||||||
if attention_mask is None:
|
|
||||||
attention_mask = np.not_equal(input_ids, config.pad_token_id).astype(np.int8)
|
|
||||||
if decoder_attention_mask is None:
|
|
||||||
decoder_attention_mask = np.concatenate([np.ones(decoder_input_ids[:, :1].shape, dtype=np.int8), np.not_equal(decoder_input_ids[:, 1:], config.pad_token_id).astype(np.int8)], axis=-1)
|
|
||||||
return {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"decoder_input_ids": decoder_input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
"decoder_attention_mask": decoder_attention_mask,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@require_flax
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
) if is_flax_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_flax_available() else ()
|
|
||||||
is_encoder_decoder = True
|
|
||||||
test_pruning = False
|
|
||||||
test_head_masking = False
|
|
||||||
test_onnx = False
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.model_tester = Flax{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
|
||||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
|
||||||
|
|
||||||
def test_config(self):
|
|
||||||
self.config_tester.run_common_tests()
|
|
||||||
|
|
||||||
def test_use_cache_forward(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
self.model_tester.check_use_cache_forward(model_class, config, inputs_dict)
|
|
||||||
|
|
||||||
def test_use_cache_forward_with_attn_mask(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
self.model_tester.check_use_cache_forward_with_attn_mask(model_class, config, inputs_dict)
|
|
||||||
|
|
||||||
|
|
||||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
|
||||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
|
||||||
if a is None and b is None:
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
if _assert_tensors_equal(a, b, atol=atol):
|
|
||||||
return True
|
|
||||||
raise
|
|
||||||
except Exception:
|
|
||||||
if len(prefix) > 0:
|
|
||||||
prefix = f"{prefix}: "
|
|
||||||
raise AssertionError(f"{prefix}{a} != {b}")
|
|
||||||
|
|
||||||
|
|
||||||
def _long_tensor(tok_lst):
|
|
||||||
return np.array(tok_lst, dtype=np.int32)
|
|
||||||
|
|
||||||
|
|
||||||
TOLERANCE = 1e-4
|
|
||||||
|
|
||||||
|
|
||||||
@slow
|
|
||||||
@require_sentencepiece
|
|
||||||
@require_tokenizers
|
|
||||||
@require_flax
|
|
||||||
class Flax{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
|
||||||
def test_inference_no_head(self):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
# change to intended input here
|
|
||||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
|
||||||
output = model(**inputs_dict)[0]
|
|
||||||
expected_shape = (1, 11, 1024)
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
# change to expected output here
|
|
||||||
expected_slice = np.array(
|
|
||||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
|
||||||
)
|
|
||||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
|
||||||
|
|
||||||
def test_inference_with_head(self):
|
|
||||||
model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
# change to intended input here
|
|
||||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
|
||||||
output = model(**inputs_dict)[0]
|
|
||||||
expected_shape = (1, 11, 1024)
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
# change to expected output here
|
|
||||||
expected_slice = np.array(
|
|
||||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
|
||||||
)
|
|
||||||
_assert_tensors_equal(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
|
||||||
|
|
||||||
def test_seq_to_seq_generation(self):
|
|
||||||
hf = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
|
|
||||||
batch_input = [
|
|
||||||
# string 1,
|
|
||||||
# string 2,
|
|
||||||
# string 3,
|
|
||||||
# string 4,
|
|
||||||
]
|
|
||||||
|
|
||||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
|
||||||
dct = tok.batch_encode_plus(
|
|
||||||
batch_input,
|
|
||||||
max_length=512,
|
|
||||||
padding="max_length",
|
|
||||||
truncation_strategy="only_first",
|
|
||||||
truncation=True,
|
|
||||||
return_tensors="np",
|
|
||||||
)
|
|
||||||
|
|
||||||
hypotheses_batch = hf.generate(
|
|
||||||
input_ids=dct["input_ids"],
|
|
||||||
attention_mask=dct["attention_mask"],
|
|
||||||
num_beams=2,
|
|
||||||
)
|
|
||||||
|
|
||||||
EXPECTED = [
|
|
||||||
# here expected 1,
|
|
||||||
# here expected 2,
|
|
||||||
# here expected 3,
|
|
||||||
# here expected 4,
|
|
||||||
]
|
|
||||||
|
|
||||||
generated = tok.batch_decode(
|
|
||||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
|
||||||
)
|
|
||||||
assert generated == EXPECTED
|
|
||||||
{%- endif %}
|
|
@ -1,971 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from transformers import is_tf_available, {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
from transformers.testing_utils import require_tf, slow
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
|
||||||
from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
|
|
||||||
|
|
||||||
|
|
||||||
if is_tf_available():
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
from transformers import (
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
|
||||||
self.batch_size = 13
|
|
||||||
self.seq_length = 7
|
|
||||||
self.is_training = True
|
|
||||||
self.use_input_mask = True
|
|
||||||
self.use_token_type_ids = True
|
|
||||||
self.use_labels = True
|
|
||||||
self.vocab_size = 99
|
|
||||||
self.hidden_size = 32
|
|
||||||
self.num_hidden_layers = 5
|
|
||||||
self.num_attention_heads = 4
|
|
||||||
self.intermediate_size = 37
|
|
||||||
self.hidden_act = "gelu"
|
|
||||||
self.hidden_dropout_prob = 0.1
|
|
||||||
self.attention_probs_dropout_prob = 0.1
|
|
||||||
self.max_position_embeddings = 512
|
|
||||||
self.type_vocab_size = 16
|
|
||||||
self.type_sequence_label_size = 2
|
|
||||||
self.initializer_range = 0.02
|
|
||||||
self.num_labels = 3
|
|
||||||
self.num_choices = 4
|
|
||||||
self.scope = None
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
|
||||||
|
|
||||||
input_mask = None
|
|
||||||
if self.use_input_mask:
|
|
||||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
|
||||||
|
|
||||||
token_type_ids = None
|
|
||||||
if self.use_token_type_ids:
|
|
||||||
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
|
|
||||||
|
|
||||||
sequence_labels = None
|
|
||||||
token_labels = None
|
|
||||||
choice_labels = None
|
|
||||||
if self.use_labels:
|
|
||||||
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
|
||||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
|
||||||
choice_labels = ids_tensor([self.batch_size], self.num_choices)
|
|
||||||
|
|
||||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
|
||||||
vocab_size=self.vocab_size,
|
|
||||||
hidden_size=self.hidden_size,
|
|
||||||
num_hidden_layers=self.num_hidden_layers,
|
|
||||||
num_attention_heads=self.num_attention_heads,
|
|
||||||
intermediate_size=self.intermediate_size,
|
|
||||||
hidden_act=self.hidden_act,
|
|
||||||
hidden_dropout_prob=self.hidden_dropout_prob,
|
|
||||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
|
||||||
type_vocab_size=self.type_vocab_size,
|
|
||||||
initializer_range=self.initializer_range,
|
|
||||||
return_dict=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_decoder(self):
|
|
||||||
(
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
) = self.prepare_config_and_inputs()
|
|
||||||
|
|
||||||
config.is_decoder = True
|
|
||||||
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
|
|
||||||
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
|
|
||||||
|
|
||||||
return (
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
encoder_hidden_states,
|
|
||||||
encoder_attention_mask,
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_model(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
|
||||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
|
||||||
|
|
||||||
inputs = [input_ids, input_mask]
|
|
||||||
result = model(inputs)
|
|
||||||
|
|
||||||
result = model(input_ids)
|
|
||||||
|
|
||||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_base_model(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
|
||||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
|
||||||
result = model(inputs)
|
|
||||||
|
|
||||||
inputs = [input_ids, input_mask]
|
|
||||||
result = model(inputs)
|
|
||||||
|
|
||||||
result = model(input_ids)
|
|
||||||
|
|
||||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
|
||||||
|
|
||||||
def create_and_check_model_as_decoder(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
encoder_hidden_states,
|
|
||||||
encoder_attention_mask,
|
|
||||||
):
|
|
||||||
config.add_cross_attention = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
"encoder_hidden_states": encoder_hidden_states,
|
|
||||||
"encoder_attention_mask": encoder_attention_mask,
|
|
||||||
}
|
|
||||||
result = model(inputs)
|
|
||||||
|
|
||||||
inputs = [input_ids, input_mask]
|
|
||||||
result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
|
|
||||||
|
|
||||||
# Also check the case where encoder outputs are not passed
|
|
||||||
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
|
|
||||||
|
|
||||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_model(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
prediction_scores = model(inputs)["logits"]
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_model_as_decoder(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
encoder_hidden_states,
|
|
||||||
encoder_attention_mask,
|
|
||||||
):
|
|
||||||
config.add_cross_attention = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
"encoder_hidden_states": encoder_hidden_states,
|
|
||||||
"encoder_attention_mask": encoder_attention_mask,
|
|
||||||
}
|
|
||||||
result = model(inputs)
|
|
||||||
|
|
||||||
inputs = [input_ids, input_mask]
|
|
||||||
result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
|
|
||||||
|
|
||||||
prediction_scores = result["logits"]
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_model_past(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
|
|
||||||
# first forward pass
|
|
||||||
outputs = model(input_ids, use_cache=True)
|
|
||||||
outputs_use_cache_conf = model(input_ids)
|
|
||||||
outputs_no_past = model(input_ids, use_cache=False)
|
|
||||||
|
|
||||||
self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
|
|
||||||
self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
|
|
||||||
|
|
||||||
past_key_values = outputs.past_key_values
|
|
||||||
|
|
||||||
# create hypothetical next token and extent to next_input_ids
|
|
||||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
|
||||||
|
|
||||||
# append to next input_ids and attn_mask
|
|
||||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
|
||||||
|
|
||||||
output_from_no_past = model(next_input_ids, output_hidden_states=True).hidden_states[0]
|
|
||||||
output_from_past = model(
|
|
||||||
next_tokens, past_key_values=past_key_values, output_hidden_states=True
|
|
||||||
).hidden_states[0]
|
|
||||||
|
|
||||||
# select random slice
|
|
||||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
|
||||||
output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
|
|
||||||
output_from_past_slice = output_from_past[:, 0, random_slice_idx]
|
|
||||||
|
|
||||||
# test that outputs are equal for slice
|
|
||||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_model_past_with_attn_mask(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
|
|
||||||
# create attention mask
|
|
||||||
half_seq_length = self.seq_length // 2
|
|
||||||
attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32)
|
|
||||||
attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32)
|
|
||||||
attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
|
|
||||||
|
|
||||||
# first forward pass
|
|
||||||
outputs = model(input_ids, attention_mask=attn_mask, use_cache=True)
|
|
||||||
|
|
||||||
# create hypothetical next token and extent to next_input_ids
|
|
||||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
|
||||||
|
|
||||||
past_key_values = outputs.past_key_values
|
|
||||||
|
|
||||||
# change a random masked slice from input_ids
|
|
||||||
random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1
|
|
||||||
random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size)
|
|
||||||
vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change)
|
|
||||||
condition = tf.transpose(
|
|
||||||
tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))
|
|
||||||
)
|
|
||||||
input_ids = tf.where(condition, random_other_next_tokens, input_ids)
|
|
||||||
|
|
||||||
# append to next input_ids and
|
|
||||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
|
||||||
attn_mask = tf.concat(
|
|
||||||
[attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)],
|
|
||||||
axis=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
output_from_no_past = model(
|
|
||||||
next_input_ids,
|
|
||||||
attention_mask=attn_mask,
|
|
||||||
output_hidden_states=True,
|
|
||||||
).hidden_states[0]
|
|
||||||
output_from_past = model(
|
|
||||||
next_tokens, past_key_values=past_key_values, attention_mask=attn_mask, output_hidden_states=True
|
|
||||||
).hidden_states[0]
|
|
||||||
|
|
||||||
# select random slice
|
|
||||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
|
||||||
output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
|
|
||||||
output_from_past_slice = output_from_past[:, 0, random_slice_idx]
|
|
||||||
|
|
||||||
# test that outputs are equal for slice
|
|
||||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
|
|
||||||
|
|
||||||
def create_and_check_causal_lm_model_past_large_inputs(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
):
|
|
||||||
config.is_decoder = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
|
|
||||||
input_ids = input_ids[:1, :]
|
|
||||||
input_mask = input_mask[:1, :]
|
|
||||||
self.batch_size = 1
|
|
||||||
|
|
||||||
# first forward pass
|
|
||||||
outputs = model(input_ids, attention_mask=input_mask, use_cache=True)
|
|
||||||
past_key_values = outputs.past_key_values
|
|
||||||
|
|
||||||
# create hypothetical next token and extent to next_input_ids
|
|
||||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
|
||||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
|
||||||
|
|
||||||
# append to next input_ids and
|
|
||||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
|
||||||
next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
|
|
||||||
|
|
||||||
output_from_no_past = model(
|
|
||||||
next_input_ids,
|
|
||||||
attention_mask=next_attention_mask,
|
|
||||||
output_hidden_states=True,
|
|
||||||
).hidden_states[0]
|
|
||||||
output_from_past = model(
|
|
||||||
next_tokens,
|
|
||||||
attention_mask=next_attention_mask,
|
|
||||||
past_key_values=past_key_values,
|
|
||||||
output_hidden_states=True,
|
|
||||||
).hidden_states[0]
|
|
||||||
|
|
||||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
|
||||||
|
|
||||||
# select random slice
|
|
||||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
|
||||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
|
||||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
|
||||||
|
|
||||||
# test that outputs are equal for slice
|
|
||||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
|
||||||
|
|
||||||
def create_and_check_decoder_model_past_large_inputs(
|
|
||||||
self,
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
encoder_hidden_states,
|
|
||||||
encoder_attention_mask,
|
|
||||||
):
|
|
||||||
config.add_cross_attention = True
|
|
||||||
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForCausalLM(config=config)
|
|
||||||
|
|
||||||
input_ids = input_ids[:1, :]
|
|
||||||
input_mask = input_mask[:1, :]
|
|
||||||
encoder_hidden_states = encoder_hidden_states[:1, :, :]
|
|
||||||
encoder_attention_mask = encoder_attention_mask[:1, :]
|
|
||||||
self.batch_size = 1
|
|
||||||
|
|
||||||
# first forward pass
|
|
||||||
outputs = model(
|
|
||||||
input_ids,
|
|
||||||
attention_mask=input_mask,
|
|
||||||
encoder_hidden_states=encoder_hidden_states,
|
|
||||||
encoder_attention_mask=encoder_attention_mask,
|
|
||||||
use_cache=True,
|
|
||||||
)
|
|
||||||
past_key_values = outputs.past_key_values
|
|
||||||
|
|
||||||
# create hypothetical next token and extent to next_input_ids
|
|
||||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
|
||||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
|
||||||
|
|
||||||
# append to next input_ids and
|
|
||||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
|
||||||
next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
|
|
||||||
|
|
||||||
output_from_no_past = model(
|
|
||||||
next_input_ids,
|
|
||||||
attention_mask=next_attention_mask,
|
|
||||||
encoder_hidden_states=encoder_hidden_states,
|
|
||||||
encoder_attention_mask=encoder_attention_mask,
|
|
||||||
output_hidden_states=True,
|
|
||||||
).hidden_states[0]
|
|
||||||
output_from_past = model(
|
|
||||||
next_tokens,
|
|
||||||
attention_mask=next_attention_mask,
|
|
||||||
encoder_hidden_states=encoder_hidden_states,
|
|
||||||
encoder_attention_mask=encoder_attention_mask,
|
|
||||||
past_key_values=past_key_values,
|
|
||||||
output_hidden_states=True,
|
|
||||||
).hidden_states[0]
|
|
||||||
|
|
||||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
|
||||||
|
|
||||||
# select random slice
|
|
||||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
|
||||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
|
||||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
|
||||||
|
|
||||||
# test that outputs are equal for slice
|
|
||||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
|
||||||
|
|
||||||
def create_and_check_for_masked_lm(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
|
||||||
|
|
||||||
def create_and_check_for_sequence_classification(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_labels = self.num_labels
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = model(inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
|
||||||
|
|
||||||
def create_and_check_for_multiple_choice(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_choices = self.num_choices
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
|
|
||||||
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
|
|
||||||
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
|
|
||||||
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
|
|
||||||
inputs = {
|
|
||||||
"input_ids": multiple_choice_inputs_ids,
|
|
||||||
"attention_mask": multiple_choice_input_mask,
|
|
||||||
"token_type_ids": multiple_choice_token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
|
|
||||||
|
|
||||||
def create_and_check_for_token_classification(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
config.num_labels = self.num_labels
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
result = model(inputs)
|
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
|
|
||||||
|
|
||||||
def create_and_check_for_question_answering(
|
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
|
||||||
):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": input_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = model(inputs)
|
|
||||||
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
|
|
||||||
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
(
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
token_type_ids,
|
|
||||||
input_mask,
|
|
||||||
sequence_labels,
|
|
||||||
token_labels,
|
|
||||||
choice_labels,
|
|
||||||
) = config_and_inputs
|
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
|
||||||
return config, inputs_dict
|
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
test_head_masking = False
|
|
||||||
test_onnx = False
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
|
||||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
|
|
||||||
|
|
||||||
def test_config(self):
|
|
||||||
self.config_tester.run_common_tests()
|
|
||||||
|
|
||||||
def test_model(self):
|
|
||||||
"""Test the base model"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
|
||||||
|
|
||||||
@unittest.skip(reason="Template classes interact badly with this test.")
|
|
||||||
def test_keras_fit(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def test_causal_lm_base_model(self):
|
|
||||||
"""Test the base model of the causal LM model
|
|
||||||
|
|
||||||
is_deocder=True, no cross_attention, no encoder outputs
|
|
||||||
"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_causal_lm_base_model(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_model_as_decoder(self):
|
|
||||||
"""Test the base model as a decoder (of an encoder-decoder architecture)
|
|
||||||
|
|
||||||
is_deocder=True + cross_attention + pass encoder outputs
|
|
||||||
"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
|
||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_masked_lm(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_causal_lm(self):
|
|
||||||
"""Test the causal LM model"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_causal_lm_model(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_causal_lm_model_as_decoder(self):
|
|
||||||
"""Test the causal LM model as a decoder"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
|
||||||
self.model_tester.create_and_check_causal_lm_model_as_decoder(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_causal_lm_model_past(self):
|
|
||||||
"""Test causal LM model with `past_key_values`"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_causal_lm_model_past(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_causal_lm_model_past_with_attn_mask(self):
|
|
||||||
"""Test the causal LM model with `past_key_values` and `attention_mask`"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_causal_lm_model_past_with_attn_mask(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_causal_lm_model_past_with_large_inputs(self):
|
|
||||||
"""Test the causal LM model with `past_key_values` and a longer decoder sequence length"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_causal_lm_model_past_large_inputs(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_decoder_model_past_with_large_inputs(self):
|
|
||||||
"""Similar to `test_causal_lm_model_past_with_large_inputs` but with cross-attention"""
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
|
|
||||||
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_multiple_choice(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_question_answering(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_sequence_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_token_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
@slow
|
|
||||||
def test_model_from_pretrained(self):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
|
||||||
self.assertIsNotNone(model)
|
|
||||||
|
|
||||||
@require_tf
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
|
||||||
@slow
|
|
||||||
def test_inference_masked_lm(self):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForMaskedLM.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
|
|
||||||
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
|
||||||
output = model(input_ids)[0]
|
|
||||||
|
|
||||||
# TODO Replace vocab size
|
|
||||||
vocab_size = 32000
|
|
||||||
|
|
||||||
expected_shape = [1, 6, vocab_size]
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
|
|
||||||
print(output[:, :3, :3])
|
|
||||||
|
|
||||||
# TODO Replace values below with what was printed above.
|
|
||||||
expected_slice = tf.constant(
|
|
||||||
[
|
|
||||||
[
|
|
||||||
[-0.05243197, -0.04498899, 0.05512108],
|
|
||||||
[-0.07444685, -0.01064632, 0.04352357],
|
|
||||||
[-0.05020351, 0.05530146, 0.00700043],
|
|
||||||
]
|
|
||||||
]
|
|
||||||
)
|
|
||||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
|
||||||
|
|
||||||
{% else %}
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from transformers import (
|
|
||||||
is_tf_available,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Config,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
|
||||||
)
|
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
|
||||||
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
|
||||||
|
|
||||||
|
|
||||||
if is_tf_available():
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
from transformers import (
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
|
||||||
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
config_updates = {}
|
|
||||||
hidden_act = "gelu"
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_labels=False,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=20,
|
|
||||||
eos_token_id=2,
|
|
||||||
pad_token_id=1,
|
|
||||||
bos_token_id=0,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.seq_length = seq_length
|
|
||||||
self.is_training = is_training
|
|
||||||
self.use_labels = use_labels
|
|
||||||
self.vocab_size = vocab_size
|
|
||||||
self.hidden_size = hidden_size
|
|
||||||
self.num_hidden_layers = num_hidden_layers
|
|
||||||
self.num_attention_heads = num_attention_heads
|
|
||||||
self.intermediate_size = intermediate_size
|
|
||||||
|
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
|
||||||
self.max_position_embeddings = max_position_embeddings
|
|
||||||
self.eos_token_id = eos_token_id
|
|
||||||
self.pad_token_id = pad_token_id
|
|
||||||
self.bos_token_id = bos_token_id
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
|
|
||||||
eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
|
|
||||||
input_ids = tf.concat([input_ids, eos_tensor], axis=1)
|
|
||||||
|
|
||||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
|
||||||
|
|
||||||
config = self.config_cls(
|
|
||||||
vocab_size=self.vocab_size,
|
|
||||||
d_model=self.hidden_size,
|
|
||||||
encoder_layers=self.num_hidden_layers,
|
|
||||||
decoder_layers=self.num_hidden_layers,
|
|
||||||
encoder_attention_heads=self.num_attention_heads,
|
|
||||||
decoder_attention_heads=self.num_attention_heads,
|
|
||||||
encoder_ffn_dim=self.intermediate_size,
|
|
||||||
decoder_ffn_dim=self.intermediate_size,
|
|
||||||
dropout=self.hidden_dropout_prob,
|
|
||||||
attention_dropout=self.attention_probs_dropout_prob,
|
|
||||||
max_position_embeddings=self.max_position_embeddings,
|
|
||||||
eos_token_ids=[2],
|
|
||||||
bos_token_id=self.bos_token_id,
|
|
||||||
pad_token_id=self.pad_token_id,
|
|
||||||
decoder_start_token_id=self.pad_token_id,
|
|
||||||
**self.config_updates,
|
|
||||||
)
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
|
||||||
return config, inputs_dict
|
|
||||||
|
|
||||||
def check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder()
|
|
||||||
input_ids = inputs_dict["input_ids"]
|
|
||||||
|
|
||||||
input_ids = input_ids[:1, :]
|
|
||||||
attention_mask = inputs_dict["attention_mask"][:1, :]
|
|
||||||
self.batch_size = 1
|
|
||||||
|
|
||||||
# first forward pass
|
|
||||||
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
|
||||||
|
|
||||||
output, past_key_values = outputs.to_tuple()
|
|
||||||
|
|
||||||
# create hypothetical next token and extent to next_input_ids
|
|
||||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
|
||||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
|
||||||
|
|
||||||
# append to next input_ids and
|
|
||||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
|
||||||
next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)
|
|
||||||
|
|
||||||
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
|
|
||||||
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]
|
|
||||||
|
|
||||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
|
||||||
|
|
||||||
# select random slice
|
|
||||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
|
||||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
|
||||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
|
||||||
|
|
||||||
# test that outputs are equal for slice
|
|
||||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
decoder_input_ids,
|
|
||||||
attention_mask=None,
|
|
||||||
decoder_attention_mask=None,
|
|
||||||
):
|
|
||||||
if attention_mask is None:
|
|
||||||
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int32)
|
|
||||||
if decoder_attention_mask is None:
|
|
||||||
decoder_attention_mask = tf.concat([tf.ones(decoder_input_ids[:, :1].shape, dtype=tf.int32), tf.cast(tf.math.not_equal(decoder_input_ids[:, 1:], config.pad_token_id), tf.int32)], axis=-1)
|
|
||||||
return {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"decoder_input_ids": decoder_input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
"decoder_attention_mask": decoder_attention_mask,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
all_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model) if is_tf_available() else ()
|
|
||||||
all_generative_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_tf_available() else ()
|
|
||||||
is_encoder_decoder = True
|
|
||||||
test_pruning = False
|
|
||||||
test_head_masking = False
|
|
||||||
test_onnx = False
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
|
||||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
|
||||||
|
|
||||||
def test_config(self):
|
|
||||||
self.config_tester.run_common_tests()
|
|
||||||
|
|
||||||
def test_decoder_model_past_large_inputs(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)
|
|
||||||
|
|
||||||
@unittest.skip(reason="Template classes interact badly with this test.")
|
|
||||||
def test_keras_fit(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
|
||||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
|
||||||
if a is None and b is None:
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
if tf.debugging.assert_near(a, b, atol=atol):
|
|
||||||
return True
|
|
||||||
raise
|
|
||||||
except Exception:
|
|
||||||
if len(prefix) > 0:
|
|
||||||
prefix = f"{prefix}: "
|
|
||||||
raise AssertionError(f"{prefix}{a} != {b}")
|
|
||||||
|
|
||||||
|
|
||||||
def _long_tensor(tok_lst):
|
|
||||||
return tf.constant(tok_lst, dtype=tf.int32)
|
|
||||||
|
|
||||||
|
|
||||||
TOLERANCE = 1e-4
|
|
||||||
|
|
||||||
|
|
||||||
@slow
|
|
||||||
@require_sentencepiece
|
|
||||||
@require_tokenizers
|
|
||||||
@require_tf
|
|
||||||
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
|
||||||
def test_inference_no_head(self):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
# change to intended input here
|
|
||||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
|
||||||
output = model(**inputs_dict)[0]
|
|
||||||
expected_shape = (1, 11, 1024)
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
# change to expected output here
|
|
||||||
expected_slice = tf.Tensor(
|
|
||||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
|
||||||
)
|
|
||||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
|
||||||
|
|
||||||
def test_inference_with_head(self):
|
|
||||||
model = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
# change to intended input here
|
|
||||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
|
||||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
|
||||||
output = model(**inputs_dict)[0]
|
|
||||||
expected_shape = (1, 11, 1024)
|
|
||||||
self.assertEqual(output.shape, expected_shape)
|
|
||||||
# change to expected output here
|
|
||||||
expected_slice = tf.Tensor(
|
|
||||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
|
||||||
)
|
|
||||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
|
||||||
|
|
||||||
def test_seq_to_seq_generation(self):
|
|
||||||
hf = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
|
||||||
|
|
||||||
batch_input = [
|
|
||||||
# string 1,
|
|
||||||
# string 2,
|
|
||||||
# string 3,
|
|
||||||
# string 4,
|
|
||||||
]
|
|
||||||
|
|
||||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
|
||||||
dct = tok.batch_encode_plus(
|
|
||||||
batch_input,
|
|
||||||
max_length=512,
|
|
||||||
padding="max_length",
|
|
||||||
truncation_strategy="only_first",
|
|
||||||
truncation=True,
|
|
||||||
return_tensors="tf",
|
|
||||||
)
|
|
||||||
|
|
||||||
hypotheses_batch = hf.generate(
|
|
||||||
input_ids=dct["input_ids"],
|
|
||||||
attention_mask=dct["attention_mask"],
|
|
||||||
num_beams=2,
|
|
||||||
)
|
|
||||||
|
|
||||||
EXPECTED = [
|
|
||||||
# here expected 1,
|
|
||||||
# here expected 2,
|
|
||||||
# here expected 3,
|
|
||||||
# here expected 4,
|
|
||||||
]
|
|
||||||
|
|
||||||
generated = tok.batch_decode(
|
|
||||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
|
||||||
)
|
|
||||||
assert generated == EXPECTED
|
|
||||||
{%- endif %}
|
|
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@
|
|||||||
## Copyright 2022 The HuggingFace Team. All rights reserved.
|
|
||||||
##
|
|
||||||
## Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
## you may not use this file except in compliance with the License.
|
|
||||||
## You may obtain a copy of the License at
|
|
||||||
##
|
|
||||||
## http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
##
|
|
||||||
## Unless required by applicable law or agreed to in writing, software
|
|
||||||
## distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
## See the License for the specific language governing permissions and
|
|
||||||
## limitations under the License.
|
|
||||||
|
|
||||||
## This file is made so that specific statements may be copied inside existing files. This is useful to copy
|
|
||||||
## import statements in __init__.py, or to complete model lists in the AUTO files.
|
|
||||||
##
|
|
||||||
## It is to be used as such:
|
|
||||||
## Put '# To replace in: "FILE_PATH"' in order to indicate the contents will be copied in the file at path FILE_PATH
|
|
||||||
## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurrence** of that line in the file at FILE_PATH
|
|
||||||
## Put '# Replace with:' followed by the lines containing the content to define the content
|
|
||||||
## End a statement with '# End.'. If starting a new statement without redefining the FILE_PATH, it will continue pasting
|
|
||||||
## content in that file.
|
|
||||||
##
|
|
||||||
## Put '## COMMENT' to comment on the file.
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/__init__.py"
|
|
||||||
# Below: " # PyTorch models structure" if generating PyTorch
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
"load_tf_weights_in_{{cookiecutter.lowercase_modelname}}",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # TensorFlow models structure" if generating TensorFlow
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"TF{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # Flax models structure" if generating Flax
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Layer",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
|
||||||
[
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}Model",
|
|
||||||
"Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # Fast tokenizers structure"
|
|
||||||
# Replace with:
|
|
||||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].append("{{cookiecutter.camelcase_modelname}}TokenizerFast")
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # Models"
|
|
||||||
# Replace with:
|
|
||||||
"models.{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.camelcase_modelname}}Config", "{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/__init__.py"
|
|
||||||
# Below: " # PyTorch model imports" if generating PyTorch
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # TensorFlow model imports" if generating TensorFlow
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
TF_{{cookiecutter.uppercase_modelname}} TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # Flax model imports" if generating Flax
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Layer,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% else %}
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}Model,
|
|
||||||
Flax{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
|
||||||
)
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " # Fast tokenizers imports"
|
|
||||||
# Replace with:
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: " from .models.albert import AlbertConfig"
|
|
||||||
# Replace with:
|
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}{{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
# End.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/__init__.py"
|
|
||||||
# Below: "from . import ("
|
|
||||||
# Replace with:
|
|
||||||
{{cookiecutter.lowercase_modelname}},
|
|
||||||
# End.
|
|
||||||
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/auto/configuration_auto.py"
|
|
||||||
# Below: "# Add configs here"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}Config"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Add full (and cased) model names here"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/auto/modeling_auto.py" if generating PyTorch
|
|
||||||
# Below: "# Base model mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}Model"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model with LM heads mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Sequence Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Question Answering mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Token Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Multiple Choice mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
|
|
||||||
# Below: "# Base model mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}Model"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model with LM heads mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Sequence Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Question Answering mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Token Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Multiple Choice mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/auto/modeling_flax_auto.py" if generating Flax
|
|
||||||
# Below: "# Base model mapping"
|
|
||||||
# Replace with:
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}Model"),
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForCausalLM"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Sequence Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Question Answering mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Token Classification mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Multiple Choice mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice"),
|
|
||||||
{% else -%}
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else %}
|
|
||||||
("{{cookiecutter.lowercase_modelname}}", "Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration"),
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# To replace in: "utils/check_repo.py" if generating PyTorch
|
|
||||||
|
|
||||||
# Below: "models to ignore for model xxx mapping"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else -%}
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Encoder",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Decoder",
|
|
||||||
"{{cookiecutter.camelcase_modelname}}DecoderWrapper",
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
||||||
|
|
||||||
# Below: "models to ignore for not tested"
|
|
||||||
# Replace with:
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
|
||||||
{% else -%}
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Encoder", # Building part of bigger (tested) model.
|
|
||||||
"{{cookiecutter.camelcase_modelname}}Decoder", # Building part of bigger (tested) model.
|
|
||||||
"{{cookiecutter.camelcase_modelname}}DecoderWrapper", # Building part of bigger (tested) model.
|
|
||||||
{% endif -%}
|
|
||||||
# End.
|
|
@ -1,157 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
"""Tokenization classes for {{cookiecutter.modelname}}."""
|
|
||||||
|
|
||||||
{%- if cookiecutter.tokenizer_type == "Based on BERT" %}
|
|
||||||
from ...utils import logging
|
|
||||||
from ..bert.tokenization_bert_fast import BertTokenizerFast
|
|
||||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
|
||||||
|
|
||||||
PRETRAINED_VOCAB_FILES_MAP = {
|
|
||||||
"vocab_file": {
|
|
||||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
|
||||||
r"""
|
|
||||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
|
||||||
|
|
||||||
[`~{{cookiecutter.camelcase_modelname}}TokenizerFast`] is identical to [`BertTokenizerFast`] and runs
|
|
||||||
end-to-end tokenization: punctuation splitting and wordpiece.
|
|
||||||
|
|
||||||
Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning
|
|
||||||
parameters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
|
||||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
|
||||||
from ...utils import logging
|
|
||||||
from ..bart.tokenization_bart_fast import BartTokenizerFast
|
|
||||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
|
||||||
r"""
|
|
||||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
|
||||||
|
|
||||||
[`~{{cookiecutter.camelcase_modelname}}TokenizerFast`] is identical to [`BartTokenizerFast`] and runs
|
|
||||||
end-to-end tokenization: punctuation splitting and wordpiece.
|
|
||||||
|
|
||||||
Refer to superclass [`BartTokenizerFast`] for usage examples and documentation concerning
|
|
||||||
parameters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from tokenizers import ByteLevelBPETokenizer
|
|
||||||
|
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
|
||||||
from ...utils import logging
|
|
||||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast):
|
|
||||||
"""
|
|
||||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
vocab_file (`str`):
|
|
||||||
Path to the vocabulary file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
vocab_file,
|
|
||||||
merges_file,
|
|
||||||
unk_token="<|endoftext|>",
|
|
||||||
bos_token="<|endoftext|>",
|
|
||||||
eos_token="<|endoftext|>",
|
|
||||||
add_prefix_space=False,
|
|
||||||
trim_offsets=True,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
super().__init__(
|
|
||||||
ByteLevelBPETokenizer(
|
|
||||||
vocab_file=vocab_file,
|
|
||||||
merges_file=merges_file,
|
|
||||||
add_prefix_space=add_prefix_space,
|
|
||||||
trim_offsets=trim_offsets,
|
|
||||||
),
|
|
||||||
bos_token=bos_token,
|
|
||||||
eos_token=eos_token,
|
|
||||||
unk_token=unk_token,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
self.add_prefix_space = add_prefix_space
|
|
||||||
|
|
||||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
|
||||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return output
|
|
||||||
|
|
||||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
|
||||||
|
|
||||||
|
|
||||||
def create_token_type_ids_from_sequences(
|
|
||||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
|
||||||
) -> List[int]:
|
|
||||||
"""
|
|
||||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
|
||||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
token_ids_0 (`List[int]`):
|
|
||||||
List of IDs.
|
|
||||||
token_ids_1 (`List[int]`, *optional*):
|
|
||||||
Optional second list of IDs for sequence pairs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`List[int]`: List of zeros.
|
|
||||||
"""
|
|
||||||
sep = [self.sep_token_id]
|
|
||||||
cls = [self.cls_token_id]
|
|
||||||
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return len(cls + token_ids_0 + sep) * [0]
|
|
||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
|
||||||
|
|
||||||
|
|
||||||
{% endif %}
|
|
@ -1,293 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2022 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
"""Tokenization classes for {{cookiecutter.modelname}}."""
|
|
||||||
|
|
||||||
{%- if cookiecutter.tokenizer_type == "Based on BERT" %}
|
|
||||||
from ...utils import logging
|
|
||||||
from ..bert.tokenization_bert import BertTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
|
||||||
|
|
||||||
PRETRAINED_VOCAB_FILES_MAP = {
|
|
||||||
"vocab_file": {
|
|
||||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
|
||||||
r"""
|
|
||||||
Construct a {{cookiecutter.modelname}} tokenizer.
|
|
||||||
|
|
||||||
[`~{{cookiecutter.camelcase_modelname}}Tokenizer`] is identical to [`BertTokenizer`] and runs end-to-end
|
|
||||||
tokenization: punctuation splitting and wordpiece.
|
|
||||||
|
|
||||||
Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning
|
|
||||||
parameters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
|
||||||
from ...utils import logging
|
|
||||||
from ..bart.tokenization_bart import BartTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
|
||||||
"""
|
|
||||||
Construct a {{cookiecutter.modelname}} tokenizer.
|
|
||||||
|
|
||||||
[`~{{cookiecutter.camelcase_modelname}}Tokenizer`] is identical to [`BartTokenizer`] and runs end-to-end
|
|
||||||
tokenization: punctuation splitting and wordpiece.
|
|
||||||
|
|
||||||
Refer to superclass [`BartTokenizer`] for usage examples and documentation concerning
|
|
||||||
parameters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from tokenizers import ByteLevelBPETokenizer
|
|
||||||
|
|
||||||
from ...tokenization_utils import AddedToken, PreTrainedTokenizer
|
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
|
||||||
from ...utils import logging
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
|
||||||
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
|
||||||
"""
|
|
||||||
Construct a {{cookiecutter.modelname}} tokenizer. Based on byte-level Byte-Pair-Encoding.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
vocab_file (`str`):
|
|
||||||
Path to the vocabulary file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
model_input_names = ["input_ids", "attention_mask"]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
vocab_file,
|
|
||||||
unk_token="<|endoftext|>",
|
|
||||||
bos_token="<|endoftext|>",
|
|
||||||
eos_token="<|endoftext|>",
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
|
|
||||||
eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
|
||||||
unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
|
|
||||||
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
|
|
||||||
|
|
||||||
""" Initialisation """
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vocab_size(self):
|
|
||||||
""" Returns vocab size """
|
|
||||||
|
|
||||||
def get_vocab(self):
|
|
||||||
""" Returns vocab as a dict """
|
|
||||||
|
|
||||||
def _tokenize(self, text):
|
|
||||||
""" Returns a tokenized string. """
|
|
||||||
|
|
||||||
def _convert_token_to_id(self, token):
|
|
||||||
""" Converts a token (str) in an id using the vocab. """
|
|
||||||
|
|
||||||
def _convert_id_to_token(self, index):
|
|
||||||
"""Converts an index (integer) in a token (str) using the vocab."""
|
|
||||||
|
|
||||||
def convert_tokens_to_string(self, tokens):
|
|
||||||
""" Converts a sequence of tokens (string) in a single string. """
|
|
||||||
|
|
||||||
def save_vocabulary(self, save_directory):
|
|
||||||
"""
|
|
||||||
Save the vocabulary and special tokens file to a directory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
save_directory (`str`):
|
|
||||||
The directory in which to save the vocabulary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`Tuple(str)`: Paths to the files saved.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def build_inputs_with_special_tokens(
|
|
||||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
|
||||||
) -> List[int]:
|
|
||||||
"""
|
|
||||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks
|
|
||||||
by concatenating and adding special tokens.
|
|
||||||
A {{cookiecutter.modelname}} sequence has the following format:
|
|
||||||
|
|
||||||
- single sequence: `<s> X </s>`
|
|
||||||
- pair of sequences: `<s> A </s></s> B </s>`
|
|
||||||
|
|
||||||
Args:
|
|
||||||
token_ids_0 (`List[int]`):
|
|
||||||
List of IDs to which the special tokens will be added.
|
|
||||||
token_ids_1 (`List[int]`, *optional*):
|
|
||||||
Optional second list of IDs for sequence pairs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
|
||||||
"""
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
|
||||||
cls = [self.cls_token_id]
|
|
||||||
sep = [self.sep_token_id]
|
|
||||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
|
||||||
|
|
||||||
def get_special_tokens_mask(
|
|
||||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
|
||||||
) -> List[int]:
|
|
||||||
"""
|
|
||||||
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
|
||||||
special tokens using the tokenizer `prepare_for_model` method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
token_ids_0 (`List[int]`):
|
|
||||||
List of IDs.
|
|
||||||
token_ids_1 (`List[int]`, *optional*):
|
|
||||||
Optional second list of IDs for sequence pairs.
|
|
||||||
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
|
||||||
Whether or not the token list is already formatted with special tokens for the model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
|
||||||
"""
|
|
||||||
if already_has_special_tokens:
|
|
||||||
return super().get_special_tokens_mask(
|
|
||||||
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return [1] + ([0] * len(token_ids_0)) + [1]
|
|
||||||
return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
|
|
||||||
|
|
||||||
def create_token_type_ids_from_sequences(
|
|
||||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
|
||||||
) -> List[int]:
|
|
||||||
"""
|
|
||||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
|
||||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
token_ids_0 (`List[int]`):
|
|
||||||
List of IDs.
|
|
||||||
token_ids_1 (`List[int]`, *optional*):
|
|
||||||
Optional second list of IDs for sequence pairs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`List[int]`: List of zeros.
|
|
||||||
"""
|
|
||||||
sep = [self.sep_token_id]
|
|
||||||
cls = [self.cls_token_id]
|
|
||||||
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return len(cls + token_ids_0 + sep) * [0]
|
|
||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
|
||||||
|
|
||||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
|
||||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
|
||||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
|
||||||
text = " " + text
|
|
||||||
return (text, kwargs)
|
|
||||||
|
|
||||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast):
|
|
||||||
"""
|
|
||||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
vocab_file (`str`):
|
|
||||||
Path to the vocabulary file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
|
||||||
model_input_names = ["input_ids", "attention_mask"]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
vocab_file,
|
|
||||||
merges_file,
|
|
||||||
unk_token="<|endoftext|>",
|
|
||||||
bos_token="<|endoftext|>",
|
|
||||||
eos_token="<|endoftext|>",
|
|
||||||
add_prefix_space=False,
|
|
||||||
trim_offsets=True,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
super().__init__(
|
|
||||||
ByteLevelBPETokenizer(
|
|
||||||
vocab_file=vocab_file,
|
|
||||||
merges_file=merges_file,
|
|
||||||
add_prefix_space=add_prefix_space,
|
|
||||||
trim_offsets=trim_offsets,
|
|
||||||
),
|
|
||||||
bos_token=bos_token,
|
|
||||||
eos_token=eos_token,
|
|
||||||
unk_token=unk_token,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
self.add_prefix_space = add_prefix_space
|
|
||||||
|
|
||||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
|
||||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return output
|
|
||||||
|
|
||||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
|
||||||
|
|
||||||
|
|
||||||
def create_token_type_ids_from_sequences(
|
|
||||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
|
||||||
) -> List[int]:
|
|
||||||
"""
|
|
||||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
|
||||||
{{cookiecutter.modelname}} does not make use of token type ids, therefore a list of zeros is returned.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
token_ids_0 (`List[int]`):
|
|
||||||
List of IDs.
|
|
||||||
token_ids_1 (`List[int]`, *optional*):
|
|
||||||
Optional second list of IDs for sequence pairs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
`List[int]`: List of zeros.
|
|
||||||
"""
|
|
||||||
sep = [self.sep_token_id]
|
|
||||||
cls = [self.cls_token_id]
|
|
||||||
|
|
||||||
if token_ids_1 is None:
|
|
||||||
return len(cls + token_ids_0 + sep) * [0]
|
|
||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
|
||||||
|
|
||||||
{% endif %}
|
|
@ -1,234 +0,0 @@
|
|||||||
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# {{cookiecutter.modelname}}
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The {{cookiecutter.modelname}} model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>. <INSERT SHORT SUMMARY HERE>
|
|
||||||
|
|
||||||
The abstract from the paper is the following:
|
|
||||||
|
|
||||||
*<INSERT PAPER ABSTRACT HERE>*
|
|
||||||
|
|
||||||
Tips:
|
|
||||||
|
|
||||||
<INSERT TIPS ABOUT MODEL HERE>
|
|
||||||
|
|
||||||
This model was contributed by [INSERT YOUR HF USERNAME HERE](<https://huggingface.co/<INSERT YOUR HF USERNAME HERE>). The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Config
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Tokenizer
|
|
||||||
- build_inputs_with_special_tokens
|
|
||||||
- get_special_tokens_mask
|
|
||||||
- create_token_type_ids_from_sequences
|
|
||||||
- save_vocabulary
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}TokenizerFast
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}TokenizerFast
|
|
||||||
|
|
||||||
|
|
||||||
{% if "PyTorch" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
|
||||||
## {{cookiecutter.camelcase_modelname}}Model
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}Model
|
|
||||||
- forward
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
|
|
||||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
- forward
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
|
|
||||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
|
|
||||||
[[autodoc]] transformers.{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
- forward
|
|
||||||
|
|
||||||
{%- else %}
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
## {{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
|
|
||||||
[[autodoc]] {{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
- forward
|
|
||||||
|
|
||||||
|
|
||||||
{% endif -%}
|
|
||||||
{% endif -%}
|
|
||||||
{% if "TensorFlow" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}Model
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}Model
|
|
||||||
- call
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
{%- else %}
|
|
||||||
## TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
|
|
||||||
[[autodoc]] TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
{% endif -%}
|
|
||||||
{% endif -%}
|
|
||||||
|
|
||||||
{% if "Flax" in cookiecutter.generate_tensorflow_pytorch_and_flax -%}
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}Model
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}Model
|
|
||||||
- call
|
|
||||||
|
|
||||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForCausalLM
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
{%- else %}
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
## Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
|
|
||||||
[[autodoc]] Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
|
||||||
- call
|
|
||||||
|
|
||||||
|
|
||||||
{% endif -%}
|
|
||||||
{% endif -%}
|
|
@ -1,19 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "BrandNewBERT",
|
|
||||||
"uppercase_modelname": "BRAND_NEW_BERT",
|
|
||||||
"lowercase_modelname": "brand_new_bert",
|
|
||||||
"camelcase_modelname": "BrandNewBert",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
|
|
||||||
"generate_tensorflow_pytorch_and_flax": [
|
|
||||||
"PyTorch, TensorFlow and Flax",
|
|
||||||
"PyTorch & TensorFlow",
|
|
||||||
"PyTorch & Flax",
|
|
||||||
"TensorFlow & Flax",
|
|
||||||
"PyTorch",
|
|
||||||
"TensorFlow",
|
|
||||||
"Flax"
|
|
||||||
],
|
|
||||||
"is_encoder_decoder_model": ["True", "False"]
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "Template",
|
|
||||||
"uppercase_modelname": "TEMPLATE",
|
|
||||||
"lowercase_modelname": "template",
|
|
||||||
"camelcase_modelname": "Template",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": "Based on BERT",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax",
|
|
||||||
"is_encoder_decoder_model": "False"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "TemplateFLAX",
|
|
||||||
"uppercase_modelname": "TEMPLATE_FLAX",
|
|
||||||
"lowercase_modelname": "template_flax",
|
|
||||||
"camelcase_modelname": "TemplateFlax",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": "Based on BERT",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "Flax",
|
|
||||||
"is_encoder_decoder_model": "False"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "FlaxNewENCDEC",
|
|
||||||
"uppercase_modelname": "FLAX_NEW_ENC_DEC",
|
|
||||||
"lowercase_modelname": "flax_new_enc_dec_template",
|
|
||||||
"camelcase_modelname": "FlaxNewEncDec",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "new-flax-enc-dec-base",
|
|
||||||
"tokenizer_type": "Based on BART",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "Flax",
|
|
||||||
"is_encoder_decoder_model": "True"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "TemplatePT",
|
|
||||||
"uppercase_modelname": "TEMPLATE_PT",
|
|
||||||
"lowercase_modelname": "template_pt",
|
|
||||||
"camelcase_modelname": "TemplatePt",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": "Based on BERT",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "PyTorch",
|
|
||||||
"is_encoder_decoder_model": "False"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "PTNewENCDEC",
|
|
||||||
"uppercase_modelname": "PT_NEW_ENC_DEC",
|
|
||||||
"lowercase_modelname": "pt_new_enc_dec_template",
|
|
||||||
"camelcase_modelname": "PtNewEncDec",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "pt-new-enc-dec-base",
|
|
||||||
"tokenizer_type": "Based on BART",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "PyTorch",
|
|
||||||
"is_encoder_decoder_model": "True"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "TemplateBI",
|
|
||||||
"uppercase_modelname": "TEMPLATE_BI",
|
|
||||||
"lowercase_modelname": "template_bi",
|
|
||||||
"camelcase_modelname": "TemplateBi",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "bi-brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": "Standalone",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax",
|
|
||||||
"is_encoder_decoder_model": "False"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "TemplateTF",
|
|
||||||
"uppercase_modelname": "TEMPLATE_TF",
|
|
||||||
"lowercase_modelname": "template_tf",
|
|
||||||
"camelcase_modelname": "TemplateTf",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
|
||||||
"tokenizer_type": "Based on BERT",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "TensorFlow",
|
|
||||||
"is_encoder_decoder_model": "False"
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"modelname": "NewTFENCDEC",
|
|
||||||
"uppercase_modelname": "NEW_TF_ENC_DEC",
|
|
||||||
"lowercase_modelname": "new_tf_enc_dec_template",
|
|
||||||
"camelcase_modelname": "NewTFEncDec",
|
|
||||||
"authors": "The HuggingFace Team",
|
|
||||||
"checkpoint_identifier": "new-tf-enc-dec-base_template",
|
|
||||||
"tokenizer_type": "Based on BART",
|
|
||||||
"generate_tensorflow_pytorch_and_flax": "TensorFlow",
|
|
||||||
"is_encoder_decoder_model": "True"
|
|
||||||
}
|
|
@ -335,7 +335,6 @@ src/transformers/benchmark/benchmark_args_tf.py
|
|||||||
src/transformers/benchmark/benchmark_args_utils.py
|
src/transformers/benchmark/benchmark_args_utils.py
|
||||||
src/transformers/benchmark/benchmark_tf.py
|
src/transformers/benchmark/benchmark_tf.py
|
||||||
src/transformers/benchmark/benchmark_utils.py
|
src/transformers/benchmark/benchmark_utils.py
|
||||||
src/transformers/commands/add_new_model.py
|
|
||||||
src/transformers/commands/add_new_model_like.py
|
src/transformers/commands/add_new_model_like.py
|
||||||
src/transformers/commands/convert.py
|
src/transformers/commands/convert.py
|
||||||
src/transformers/commands/download.py
|
src/transformers/commands/download.py
|
||||||
|
Loading…
Reference in New Issue
Block a user