[Styling] stylify using ruff (#27144)

* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O

Co-authored-by: charliermarsh
 <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------

Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Arthur 2023-11-16 17:43:19 +01:00 committed by GitHub
parent acb5b4aff5
commit 651408a077
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
480 changed files with 867 additions and 1059 deletions

View File

@ -157,11 +157,10 @@ jobs:
command: pip freeze | tee installed.txt
- store_artifacts:
path: ~/transformers/installed.txt
- run: black --check examples tests src utils
- run: ruff examples tests src utils
- run: ruff check examples tests src utils
- run: ruff format tests src utils --check
- run: python utils/custom_init_isort.py --check_only
- run: python utils/sort_auto_mappings.py --check_only
- run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
- run: python utils/check_doc_toc.py
check_repository_consistency:

View File

@ -15,7 +15,6 @@
import argparse
import copy
import glob
import os
import random
from dataclasses import dataclass
@ -239,7 +238,7 @@ class CircleCIJob:
py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("ERROR ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
check_test_command += f"$(python3 -c '{py_command}'); "
check_test_command += f'cat summary_short.txt; echo ""; exit -1; '
check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
# Deeal with failed tests
check_test_command += f'elif [ -s reports/{self.job_name}/failures_short.txt ]; '
@ -249,7 +248,7 @@ class CircleCIJob:
py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("FAILED ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
check_test_command += f"$(python3 -c '{py_command}'); "
check_test_command += f'cat summary_short.txt; echo ""; exit -1; '
check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
check_test_command += f'elif [ -s reports/{self.job_name}/stats.txt ]; then echo "All tests pass!"; '

View File

@ -9,8 +9,8 @@ modified_only_fixup:
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo "Checking/fixing $(modified_py_files)"; \
black $(modified_py_files); \
ruff $(modified_py_files) --fix; \
ruff check $(modified_py_files) --fix; \
ruff format $(modified_py_files);\
else \
echo "No library .py files were modified"; \
fi
@ -48,11 +48,10 @@ repo-consistency:
# this target runs checks on all files
quality:
black --check $(check_dirs) setup.py conftest.py
ruff check $(check_dirs) setup.py conftest.py
ruff format --check $(check_dirs) setup.py conftest.py
python utils/custom_init_isort.py --check_only
python utils/sort_auto_mappings.py --check_only
ruff $(check_dirs) setup.py conftest.py
doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
python utils/check_doc_toc.py
# Format source code automatically and check is there are any problems left that need manual fixing
@ -60,14 +59,13 @@ quality:
extra_style_checks:
python utils/custom_init_isort.py
python utils/sort_auto_mappings.py
doc-builder style src/transformers docs/source --max_len 119 --path_to_docs docs/source
python utils/check_doc_toc.py --fix_and_overwrite
# this target runs checks on all files and potentially modifies some of them
style:
black $(check_dirs) setup.py conftest.py
ruff $(check_dirs) setup.py conftest.py --fix
ruff check $(check_dirs) setup.py conftest.py --fix
ruff format $(check_dirs) setup.py conftest.py
${MAKE} autogenerate_code
${MAKE} extra_style_checks

View File

@ -245,7 +245,7 @@ logits first, and then reshaped to match the size of the labels before you can c
... reduce_labels=False,
... )
... for key, value in metrics.items():
... if type(value) is np.ndarray:
... if isinstance(value, np.ndarray):
... metrics[key] = value.tolist()
... return metrics
```

View File

@ -242,7 +242,7 @@ pip install -q datasets transformers evaluate
... reduce_labels=False,
... )
... for key, value in metrics.items():
... if type(value) is np.ndarray:
... if isinstance(value, np.ndarray):
... metrics[key] = value.tolist()
... return metrics
```

View File

@ -212,7 +212,7 @@ class DataTrainingArguments:
if self.validation_file is not None:
extension = self.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
self.task_name = self.task_name.lower() if type(self.task_name) == str else self.task_name
self.task_name = self.task_name.lower() if isinstance(self.task_name, str) else self.task_name
def create_train_state(

View File

@ -23,7 +23,7 @@ class GLUETransformer(BaseTransformer):
mode = "sequence-classification"
def __init__(self, hparams):
if type(hparams) == dict:
if isinstance(hparams, dict):
hparams = Namespace(**hparams)
hparams.glue_output_mode = glue_output_modes[hparams.task]
num_labels = glue_tasks_num_labels[hparams.task]

View File

@ -25,7 +25,7 @@ class NERTransformer(BaseTransformer):
mode = "token-classification"
def __init__(self, hparams):
if type(hparams) == dict:
if isinstance(hparams, dict):
hparams = Namespace(**hparams)
module = import_module("tasks")
try:

View File

@ -32,7 +32,7 @@ class DeeBertEncoder(nn.Module):
self.early_exit_entropy = [-1 for _ in range(config.num_hidden_layers)]
def set_early_exit_entropy(self, x):
if (type(x) is float) or (type(x) is int):
if isinstance(x, (float, int)):
for i in range(len(self.early_exit_entropy)):
self.early_exit_entropy[i] = x
else:
@ -232,9 +232,7 @@ class DeeBertModel(BertPreTrainedModel):
outputs = (
sequence_output,
pooled_output,
) + encoder_outputs[
1:
] # add hidden_states and attentions if they are here
) + encoder_outputs[1:] # add hidden_states and attentions if they are here
return outputs # sequence_output, pooled_output, (hidden_states), (attentions), highway exits

View File

@ -158,9 +158,7 @@ header_full = """
</span>
</body>
</html>
""" % (
header_html,
)
""" % (header_html,)
st.sidebar.markdown(
header_full,
unsafe_allow_html=True,

View File

@ -1706,9 +1706,7 @@ class GeneralizedRCNN(nn.Module):
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
archive_file = pretrained_model_name_or_path
elif os.path.isfile(pretrained_model_name_or_path + ".index"):
assert (
from_tf
), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
assert from_tf, "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
pretrained_model_name_or_path + ".index"
)
archive_file = pretrained_model_name_or_path + ".index"

View File

@ -652,9 +652,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel):
outputs = (
sequence_output,
pooled_output,
) + encoder_outputs[
1:
] # add hidden_states and attentions if they are here
) + encoder_outputs[1:] # add hidden_states and attentions if they are here
return outputs # sequence_output, pooled_output, (hidden_states), (attentions)

View File

@ -311,8 +311,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
tr_loss += loss.item()
if (step + 1) % args.gradient_accumulation_steps == 0 or (
# last step in epoch but step is always smaller than gradient_accumulation_steps
len(epoch_iterator) <= args.gradient_accumulation_steps
and (step + 1) == len(epoch_iterator)
len(epoch_iterator) <= args.gradient_accumulation_steps and (step + 1) == len(epoch_iterator)
):
if args.fp16:
nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)

View File

@ -239,7 +239,7 @@ def print_model_summary(model, name_width=25, line_width=180, ignore=None):
continue
if type(mod) in ignore:
continue
if [True for s in ignore if type(s) is str and s in name]:
if [True for s in ignore if isinstance(s, str) and s in name]:
continue
act_str = f"Act:{input_q.extra_repr()}"
wgt_str = f"Wgt:{weight_q.extra_repr()}"

View File

@ -1706,9 +1706,7 @@ class GeneralizedRCNN(nn.Module):
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
archive_file = pretrained_model_name_or_path
elif os.path.isfile(pretrained_model_name_or_path + ".index"):
assert (
from_tf
), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
assert from_tf, "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
pretrained_model_name_or_path + ".index"
)
archive_file = pretrained_model_name_or_path + ".index"

View File

@ -15,6 +15,7 @@
import os
import sys
SRC_DIR = os.path.join(os.path.dirname(__file__), "src")
sys.path.append(SRC_DIR)

View File

@ -1,10 +1,6 @@
[tool.black]
line-length = 119
target-version = ['py37']
[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741"]
ignore = ["C901", "E501", "E741", "F402", "F823" ]
select = ["C", "E", "F", "I", "W"]
line-length = 119
@ -18,6 +14,19 @@ line-length = 119
lines-after-imports = 2
known-first-party = ["transformers"]
[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
[tool.pytest.ini_options]
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
doctest_glob="**/*.md"

View File

@ -1,10 +1,12 @@
from collections import Counter
import datasets
import transformers
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS
from transformers.utils import logging
logging.set_verbosity_info()
TOKENIZER_CLASSES = {
@ -101,8 +103,8 @@ def check_details(line, spm_ids, tok_ids, slow, fast):
except Exception:
pass
ok_start = fast.decode(spm_ids[:first])
ok_end = fast.decode(spm_ids[last:])
fast.decode(spm_ids[:first])
fast.decode(spm_ids[last:])
wrong = fast.decode(spm_ids[first:last])
print()
print(wrong)

View File

@ -24,13 +24,14 @@
#
# It will be used then as "stas/tiny-wmt19-en-ru"
from pathlib import Path
import json
import tempfile
from pathlib import Path
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES
mname_tiny = "tiny-wmt19-en-ru"
# Build

View File

@ -27,16 +27,18 @@
# It will be used then as "stas/tiny-wmt19-en-de"
# Build
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
mname = "facebook/wmt19-en-de"
tokenizer = FSMTTokenizer.from_pretrained(mname)
# get the correct vocab sizes, etc. from the master model
config = FSMTConfig.from_pretrained(mname)
config.update(dict(
d_model=4,
encoder_layers=1, decoder_layers=1,
encoder_ffn_dim=4, decoder_ffn_dim=4,
encoder_attention_heads=1, decoder_attention_heads=1))
config.update({
"d_model": 4,
"encoder_layers": 1, "decoder_layers": 1,
"encoder_ffn_dim": 4, "decoder_ffn_dim": 4,
"encoder_attention_heads": 1, "decoder_attention_heads": 1})
tiny_model = FSMTForConditionalGeneration(config)
print(f"num of params {tiny_model.num_parameters()}")

View File

@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
texts = {

View File

@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
texts = {

View File

@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang):
texts = {

View File

@ -22,6 +22,7 @@
# 3. build
import sentencepiece as spm
# pegasus:
# 1. no bos
# 2. eos_id is 1

View File

@ -15,8 +15,8 @@
Script to close stale issue. Taken in part from the AllenNLP repository.
https://github.com/allenai/allennlp.
"""
from datetime import datetime as dt
import os
from datetime import datetime as dt
import github.GithubException
from github import Github
@ -39,7 +39,7 @@ def main():
for i, issue in enumerate(open_issues):
print(i, issue)
comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
comments = sorted(list(issue.get_comments()), key=lambda i: i.created_at, reverse=True)
last_comment = comments[0] if len(comments) > 0 else None
if (
last_comment is not None and last_comment.user.login == "github-actions[bot]"

View File

@ -99,7 +99,6 @@ _deps = [
"accelerate>=0.20.3",
"av==9.2.0", # Latest version of PyAV (10.0.0) has issues with audio stream.
"beautifulsoup4",
"black~=23.1",
"codecarbon==1.2.0",
"cookiecutter==1.7.3",
"dataclasses",
@ -156,7 +155,7 @@ _deps = [
"rhoknp>=1.1.0,<1.3.1",
"rjieba",
"rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
"ruff>=0.0.241,<=0.0.259",
"ruff>=0.1.5,<=0.2",
"sacrebleu>=1.4.12,<2.0.0",
"sacremoses",
"safetensors>=0.3.1",
@ -310,7 +309,7 @@ extras["testing"] = (
"dill",
"evaluate",
"pytest-timeout",
"black",
"ruff",
"sacrebleu",
"rouge-score",
"nltk",
@ -329,7 +328,7 @@ extras["testing"] = (
extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"]
extras["quality"] = deps_list("black", "datasets", "isort", "ruff", "GitPython", "hf-doc-builder", "urllib3")
extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "hf-doc-builder", "urllib3")
extras["all"] = (
extras["tf"]

View File

@ -246,6 +246,7 @@ class PretrainedConfig(PushToHubMixin):
not be XLA-compatible. This option is here for backward compatibility and will be removed in Transformers
v5.
"""
model_type: str = ""
is_composition: bool = False
attribute_map: Dict[str, str] = {}

View File

@ -724,9 +724,7 @@ class MBart50Converter(SpmConverter):
("<unk>", 0.0),
]
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
# fmt: off
vocab += [("ar_AR", 0.0), ("cs_CZ", 0.0), ("de_DE", 0.0), ("en_XX", 0.0), ("es_XX", 0.0), ("et_EE", 0.0), ("fi_FI", 0.0), ("fr_XX", 0.0), ("gu_IN", 0.0), ("hi_IN", 0.0), ("it_IT", 0.0), ("ja_XX", 0.0), ("kk_KZ", 0.0), ("ko_KR", 0.0), ("lt_LT", 0.0), ("lv_LV", 0.0), ("my_MM", 0.0), ("ne_NP", 0.0), ("nl_XX", 0.0), ("ro_RO", 0.0), ("ru_RU", 0.0), ("si_LK", 0.0), ("tr_TR", 0.0), ("vi_VN", 0.0), ("zh_CN", 0.0), ("af_ZA", 0.0), ("az_AZ", 0.0), ("bn_IN", 0.0), ("fa_IR", 0.0), ("he_IL", 0.0), ("hr_HR", 0.0), ("id_ID", 0.0), ("ka_GE", 0.0), ("km_KH", 0.0), ("mk_MK", 0.0), ("ml_IN", 0.0), ("mn_MN", 0.0), ("mr_IN", 0.0), ("pl_PL", 0.0), ("ps_AF", 0.0), ("pt_XX", 0.0), ("sv_SE", 0.0), ("sw_KE", 0.0), ("ta_IN", 0.0), ("te_IN", 0.0), ("th_TH", 0.0), ("tl_XX", 0.0), ("uk_UA", 0.0), ("ur_PK", 0.0), ("xh_ZA", 0.0), ("gl_ES", 0.0), ("sl_SI", 0.0)]
# fmt: on
vocab += [("ar_AR", 0.0), ("cs_CZ", 0.0), ("de_DE", 0.0), ("en_XX", 0.0), ("es_XX", 0.0), ("et_EE", 0.0), ("fi_FI", 0.0), ("fr_XX", 0.0), ("gu_IN", 0.0), ("hi_IN", 0.0), ("it_IT", 0.0), ("ja_XX", 0.0), ("kk_KZ", 0.0), ("ko_KR", 0.0), ("lt_LT", 0.0), ("lv_LV", 0.0), ("my_MM", 0.0), ("ne_NP", 0.0), ("nl_XX", 0.0), ("ro_RO", 0.0), ("ru_RU", 0.0), ("si_LK", 0.0), ("tr_TR", 0.0), ("vi_VN", 0.0), ("zh_CN", 0.0), ("af_ZA", 0.0), ("az_AZ", 0.0), ("bn_IN", 0.0), ("fa_IR", 0.0), ("he_IL", 0.0), ("hr_HR", 0.0), ("id_ID", 0.0), ("ka_GE", 0.0), ("km_KH", 0.0), ("mk_MK", 0.0), ("ml_IN", 0.0), ("mn_MN", 0.0), ("mr_IN", 0.0), ("pl_PL", 0.0), ("ps_AF", 0.0), ("pt_XX", 0.0), ("sv_SE", 0.0), ("sw_KE", 0.0), ("ta_IN", 0.0), ("te_IN", 0.0), ("th_TH", 0.0), ("tl_XX", 0.0), ("uk_UA", 0.0), ("ur_PK", 0.0), ("xh_ZA", 0.0), ("gl_ES", 0.0), ("sl_SI", 0.0)] # fmt: skip
vocab += [("<mask>", 0.0)]
return vocab
@ -753,11 +751,7 @@ class NllbConverter(SpmConverter):
("<unk>", 0.0),
]
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
vocab += [
# fmt: off
('ace_Arab', 0.0), ('ace_Latn', 0.0), ('acm_Arab', 0.0), ('acq_Arab', 0.0), ('aeb_Arab', 0.0), ('afr_Latn', 0.0), ('ajp_Arab', 0.0), ('aka_Latn', 0.0), ('amh_Ethi', 0.0), ('apc_Arab', 0.0), ('arb_Arab', 0.0), ('ars_Arab', 0.0), ('ary_Arab', 0.0), ('arz_Arab', 0.0), ('asm_Beng', 0.0), ('ast_Latn', 0.0), ('awa_Deva', 0.0), ('ayr_Latn', 0.0), ('azb_Arab', 0.0), ('azj_Latn', 0.0), ('bak_Cyrl', 0.0), ('bam_Latn', 0.0), ('ban_Latn', 0.0), ('bel_Cyrl', 0.0), ('bem_Latn', 0.0), ('ben_Beng', 0.0), ('bho_Deva', 0.0), ('bjn_Arab', 0.0), ('bjn_Latn', 0.0), ('bod_Tibt', 0.0), ('bos_Latn', 0.0), ('bug_Latn', 0.0), ('bul_Cyrl', 0.0), ('cat_Latn', 0.0), ('ceb_Latn', 0.0), ('ces_Latn', 0.0), ('cjk_Latn', 0.0), ('ckb_Arab', 0.0), ('crh_Latn', 0.0), ('cym_Latn', 0.0), ('dan_Latn', 0.0), ('deu_Latn', 0.0), ('dik_Latn', 0.0), ('dyu_Latn', 0.0), ('dzo_Tibt', 0.0), ('ell_Grek', 0.0), ('eng_Latn', 0.0), ('epo_Latn', 0.0), ('est_Latn', 0.0), ('eus_Latn', 0.0), ('ewe_Latn', 0.0), ('fao_Latn', 0.0), ('pes_Arab', 0.0), ('fij_Latn', 0.0), ('fin_Latn', 0.0), ('fon_Latn', 0.0), ('fra_Latn', 0.0), ('fur_Latn', 0.0), ('fuv_Latn', 0.0), ('gla_Latn', 0.0), ('gle_Latn', 0.0), ('glg_Latn', 0.0), ('grn_Latn', 0.0), ('guj_Gujr', 0.0), ('hat_Latn', 0.0), ('hau_Latn', 0.0), ('heb_Hebr', 0.0), ('hin_Deva', 0.0), ('hne_Deva', 0.0), ('hrv_Latn', 0.0), ('hun_Latn', 0.0), ('hye_Armn', 0.0), ('ibo_Latn', 0.0), ('ilo_Latn', 0.0), ('ind_Latn', 0.0), ('isl_Latn', 0.0), ('ita_Latn', 0.0), ('jav_Latn', 0.0), ('jpn_Jpan', 0.0), ('kab_Latn', 0.0), ('kac_Latn', 0.0), ('kam_Latn', 0.0), ('kan_Knda', 0.0), ('kas_Arab', 0.0), ('kas_Deva', 0.0), ('kat_Geor', 0.0), ('knc_Arab', 0.0), ('knc_Latn', 0.0), ('kaz_Cyrl', 0.0), ('kbp_Latn', 0.0), ('kea_Latn', 0.0), ('khm_Khmr', 0.0), ('kik_Latn', 0.0), ('kin_Latn', 0.0), ('kir_Cyrl', 0.0), ('kmb_Latn', 0.0), ('kon_Latn', 0.0), ('kor_Hang', 0.0), ('kmr_Latn', 0.0), ('lao_Laoo', 0.0), ('lvs_Latn', 0.0), ('lij_Latn', 0.0), ('lim_Latn', 0.0), ('lin_Latn', 0.0), ('lit_Latn', 0.0), ('lmo_Latn', 0.0), ('ltg_Latn', 0.0), ('ltz_Latn', 0.0), ('lua_Latn', 0.0), ('lug_Latn', 0.0), ('luo_Latn', 0.0), ('lus_Latn', 0.0), ('mag_Deva', 0.0), ('mai_Deva', 0.0), ('mal_Mlym', 0.0), ('mar_Deva', 0.0), ('min_Latn', 0.0), ('mkd_Cyrl', 0.0), ('plt_Latn', 0.0), ('mlt_Latn', 0.0), ('mni_Beng', 0.0), ('khk_Cyrl', 0.0), ('mos_Latn', 0.0), ('mri_Latn', 0.0), ('zsm_Latn', 0.0), ('mya_Mymr', 0.0), ('nld_Latn', 0.0), ('nno_Latn', 0.0), ('nob_Latn', 0.0), ('npi_Deva', 0.0), ('nso_Latn', 0.0), ('nus_Latn', 0.0), ('nya_Latn', 0.0), ('oci_Latn', 0.0), ('gaz_Latn', 0.0), ('ory_Orya', 0.0), ('pag_Latn', 0.0), ('pan_Guru', 0.0), ('pap_Latn', 0.0), ('pol_Latn', 0.0), ('por_Latn', 0.0), ('prs_Arab', 0.0), ('pbt_Arab', 0.0), ('quy_Latn', 0.0), ('ron_Latn', 0.0), ('run_Latn', 0.0), ('rus_Cyrl', 0.0), ('sag_Latn', 0.0), ('san_Deva', 0.0), ('sat_Beng', 0.0), ('scn_Latn', 0.0), ('shn_Mymr', 0.0), ('sin_Sinh', 0.0), ('slk_Latn', 0.0), ('slv_Latn', 0.0), ('smo_Latn', 0.0), ('sna_Latn', 0.0), ('snd_Arab', 0.0), ('som_Latn', 0.0), ('sot_Latn', 0.0), ('spa_Latn', 0.0), ('als_Latn', 0.0), ('srd_Latn', 0.0), ('srp_Cyrl', 0.0), ('ssw_Latn', 0.0), ('sun_Latn', 0.0), ('swe_Latn', 0.0), ('swh_Latn', 0.0), ('szl_Latn', 0.0), ('tam_Taml', 0.0), ('tat_Cyrl', 0.0), ('tel_Telu', 0.0), ('tgk_Cyrl', 0.0), ('tgl_Latn', 0.0), ('tha_Thai', 0.0), ('tir_Ethi', 0.0), ('taq_Latn', 0.0), ('taq_Tfng', 0.0), ('tpi_Latn', 0.0), ('tsn_Latn', 0.0), ('tso_Latn', 0.0), ('tuk_Latn', 0.0), ('tum_Latn', 0.0), ('tur_Latn', 0.0), ('twi_Latn', 0.0), ('tzm_Tfng', 0.0), ('uig_Arab', 0.0), ('ukr_Cyrl', 0.0), ('umb_Latn', 0.0), ('urd_Arab', 0.0), ('uzn_Latn', 0.0), ('vec_Latn', 0.0), ('vie_Latn', 0.0), ('war_Latn', 0.0), ('wol_Latn', 0.0), ('xho_Latn', 0.0), ('ydd_Hebr', 0.0), ('yor_Latn', 0.0), ('yue_Hant', 0.0), ('zho_Hans', 0.0), ('zho_Hant', 0.0), ('zul_Latn', 0.0)
# fmt: on
]
vocab += [('ace_Arab', 0.0), ('ace_Latn', 0.0), ('acm_Arab', 0.0), ('acq_Arab', 0.0), ('aeb_Arab', 0.0), ('afr_Latn', 0.0), ('ajp_Arab', 0.0), ('aka_Latn', 0.0), ('amh_Ethi', 0.0), ('apc_Arab', 0.0), ('arb_Arab', 0.0), ('ars_Arab', 0.0), ('ary_Arab', 0.0), ('arz_Arab', 0.0), ('asm_Beng', 0.0), ('ast_Latn', 0.0), ('awa_Deva', 0.0), ('ayr_Latn', 0.0), ('azb_Arab', 0.0), ('azj_Latn', 0.0), ('bak_Cyrl', 0.0), ('bam_Latn', 0.0), ('ban_Latn', 0.0), ('bel_Cyrl', 0.0), ('bem_Latn', 0.0), ('ben_Beng', 0.0), ('bho_Deva', 0.0), ('bjn_Arab', 0.0), ('bjn_Latn', 0.0), ('bod_Tibt', 0.0), ('bos_Latn', 0.0), ('bug_Latn', 0.0), ('bul_Cyrl', 0.0), ('cat_Latn', 0.0), ('ceb_Latn', 0.0), ('ces_Latn', 0.0), ('cjk_Latn', 0.0), ('ckb_Arab', 0.0), ('crh_Latn', 0.0), ('cym_Latn', 0.0), ('dan_Latn', 0.0), ('deu_Latn', 0.0), ('dik_Latn', 0.0), ('dyu_Latn', 0.0), ('dzo_Tibt', 0.0), ('ell_Grek', 0.0), ('eng_Latn', 0.0), ('epo_Latn', 0.0), ('est_Latn', 0.0), ('eus_Latn', 0.0), ('ewe_Latn', 0.0), ('fao_Latn', 0.0), ('pes_Arab', 0.0), ('fij_Latn', 0.0), ('fin_Latn', 0.0), ('fon_Latn', 0.0), ('fra_Latn', 0.0), ('fur_Latn', 0.0), ('fuv_Latn', 0.0), ('gla_Latn', 0.0), ('gle_Latn', 0.0), ('glg_Latn', 0.0), ('grn_Latn', 0.0), ('guj_Gujr', 0.0), ('hat_Latn', 0.0), ('hau_Latn', 0.0), ('heb_Hebr', 0.0), ('hin_Deva', 0.0), ('hne_Deva', 0.0), ('hrv_Latn', 0.0), ('hun_Latn', 0.0), ('hye_Armn', 0.0), ('ibo_Latn', 0.0), ('ilo_Latn', 0.0), ('ind_Latn', 0.0), ('isl_Latn', 0.0), ('ita_Latn', 0.0), ('jav_Latn', 0.0), ('jpn_Jpan', 0.0), ('kab_Latn', 0.0), ('kac_Latn', 0.0), ('kam_Latn', 0.0), ('kan_Knda', 0.0), ('kas_Arab', 0.0), ('kas_Deva', 0.0), ('kat_Geor', 0.0), ('knc_Arab', 0.0), ('knc_Latn', 0.0), ('kaz_Cyrl', 0.0), ('kbp_Latn', 0.0), ('kea_Latn', 0.0), ('khm_Khmr', 0.0), ('kik_Latn', 0.0), ('kin_Latn', 0.0), ('kir_Cyrl', 0.0), ('kmb_Latn', 0.0), ('kon_Latn', 0.0), ('kor_Hang', 0.0), ('kmr_Latn', 0.0), ('lao_Laoo', 0.0), ('lvs_Latn', 0.0), ('lij_Latn', 0.0), ('lim_Latn', 0.0), ('lin_Latn', 0.0), ('lit_Latn', 0.0), ('lmo_Latn', 0.0), ('ltg_Latn', 0.0), ('ltz_Latn', 0.0), ('lua_Latn', 0.0), ('lug_Latn', 0.0), ('luo_Latn', 0.0), ('lus_Latn', 0.0), ('mag_Deva', 0.0), ('mai_Deva', 0.0), ('mal_Mlym', 0.0), ('mar_Deva', 0.0), ('min_Latn', 0.0), ('mkd_Cyrl', 0.0), ('plt_Latn', 0.0), ('mlt_Latn', 0.0), ('mni_Beng', 0.0), ('khk_Cyrl', 0.0), ('mos_Latn', 0.0), ('mri_Latn', 0.0), ('zsm_Latn', 0.0), ('mya_Mymr', 0.0), ('nld_Latn', 0.0), ('nno_Latn', 0.0), ('nob_Latn', 0.0), ('npi_Deva', 0.0), ('nso_Latn', 0.0), ('nus_Latn', 0.0), ('nya_Latn', 0.0), ('oci_Latn', 0.0), ('gaz_Latn', 0.0), ('ory_Orya', 0.0), ('pag_Latn', 0.0), ('pan_Guru', 0.0), ('pap_Latn', 0.0), ('pol_Latn', 0.0), ('por_Latn', 0.0), ('prs_Arab', 0.0), ('pbt_Arab', 0.0), ('quy_Latn', 0.0), ('ron_Latn', 0.0), ('run_Latn', 0.0), ('rus_Cyrl', 0.0), ('sag_Latn', 0.0), ('san_Deva', 0.0), ('sat_Beng', 0.0), ('scn_Latn', 0.0), ('shn_Mymr', 0.0), ('sin_Sinh', 0.0), ('slk_Latn', 0.0), ('slv_Latn', 0.0), ('smo_Latn', 0.0), ('sna_Latn', 0.0), ('snd_Arab', 0.0), ('som_Latn', 0.0), ('sot_Latn', 0.0), ('spa_Latn', 0.0), ('als_Latn', 0.0), ('srd_Latn', 0.0), ('srp_Cyrl', 0.0), ('ssw_Latn', 0.0), ('sun_Latn', 0.0), ('swe_Latn', 0.0), ('swh_Latn', 0.0), ('szl_Latn', 0.0), ('tam_Taml', 0.0), ('tat_Cyrl', 0.0), ('tel_Telu', 0.0), ('tgk_Cyrl', 0.0), ('tgl_Latn', 0.0), ('tha_Thai', 0.0), ('tir_Ethi', 0.0), ('taq_Latn', 0.0), ('taq_Tfng', 0.0), ('tpi_Latn', 0.0), ('tsn_Latn', 0.0), ('tso_Latn', 0.0), ('tuk_Latn', 0.0), ('tum_Latn', 0.0), ('tur_Latn', 0.0), ('twi_Latn', 0.0), ('tzm_Tfng', 0.0), ('uig_Arab', 0.0), ('ukr_Cyrl', 0.0), ('umb_Latn', 0.0), ('urd_Arab', 0.0), ('uzn_Latn', 0.0), ('vec_Latn', 0.0), ('vie_Latn', 0.0), ('war_Latn', 0.0), ('wol_Latn', 0.0), ('xho_Latn', 0.0), ('ydd_Hebr', 0.0), ('yor_Latn', 0.0), ('yue_Hant', 0.0), ('zho_Hans', 0.0), ('zho_Hant', 0.0), ('zul_Latn', 0.0)] # fmt: skip
vocab += [("<mask>", 0.0)]
return vocab
@ -1128,9 +1122,7 @@ class XGLMConverter(SpmConverter):
("<unk>", 0.0),
]
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
# fmt: off
vocab += [("<madeupword0>", 0.0), ("<madeupword1>", 0.0), ("<madeupword2>", 0.0), ("<madeupword3>", 0.0), ("<madeupword4>", 0.0), ("<madeupword5>", 0.0), ("<madeupword6>", 0.0)]
# fmt: on
vocab += [("<madeupword0>", 0.0), ("<madeupword1>", 0.0), ("<madeupword2>", 0.0), ("<madeupword3>", 0.0), ("<madeupword4>", 0.0), ("<madeupword5>", 0.0), ("<madeupword6>", 0.0)] # fmt: skip
return vocab
def unk_id(self, proto):

View File

@ -121,7 +121,7 @@ def torch_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any
if isinstance(first["label_ids"], torch.Tensor):
batch["labels"] = torch.stack([f["label_ids"] for f in features])
else:
dtype = torch.long if type(first["label_ids"][0]) is int else torch.float
dtype = torch.long if isinstance(first["label_ids"][0], int) else torch.float
batch["labels"] = torch.tensor([f["label_ids"] for f in features], dtype=dtype)
# Handling of all other possible keys.
@ -196,7 +196,7 @@ def numpy_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any
if isinstance(first["label_ids"], np.ndarray):
batch["labels"] = np.stack([f["label_ids"] for f in features])
else:
dtype = np.int64 if type(first["label_ids"][0]) is int else np.float32
dtype = np.int64 if isinstance(first["label_ids"][0], int) else np.float32
batch["labels"] = np.array([f["label_ids"] for f in features], dtype=dtype)
# Handling of all other possible keys.

View File

@ -6,7 +6,6 @@ deps = {
"accelerate": "accelerate>=0.20.3",
"av": "av==9.2.0",
"beautifulsoup4": "beautifulsoup4",
"black": "black~=23.1",
"codecarbon": "codecarbon==1.2.0",
"cookiecutter": "cookiecutter==1.7.3",
"dataclasses": "dataclasses",
@ -62,7 +61,7 @@ deps = {
"rhoknp": "rhoknp>=1.1.0,<1.3.1",
"rjieba": "rjieba",
"rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
"ruff": "ruff>=0.0.241,<=0.0.259",
"ruff": "ruff>=0.1.5,<=0.2",
"sacrebleu": "sacrebleu>=1.4.12,<2.0.0",
"sacremoses": "sacremoses",
"safetensors": "safetensors>=0.3.1",

View File

@ -245,8 +245,7 @@ def is_valid_annotation_coco_detection(annotation: Dict[str, Union[List, Tuple]]
and isinstance(annotation["annotations"], (list, tuple))
and (
# an image can have no annotations
len(annotation["annotations"]) == 0
or isinstance(annotation["annotations"][0], dict)
len(annotation["annotations"]) == 0 or isinstance(annotation["annotations"][0], dict)
)
):
return True
@ -262,8 +261,7 @@ def is_valid_annotation_coco_panoptic(annotation: Dict[str, Union[List, Tuple]])
and isinstance(annotation["segments_info"], (list, tuple))
and (
# an image can have no segments
len(annotation["segments_info"]) == 0
or isinstance(annotation["segments_info"][0], dict)
len(annotation["segments_info"]) == 0 or isinstance(annotation["segments_info"][0], dict)
)
):
return True

View File

@ -179,6 +179,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
models, `pixel_values` for vision models and `input_values` for speech models).
"""
config_class = None
base_model_prefix = ""
main_input_name = "input_ids"

View File

@ -1075,6 +1075,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
models, `pixel_values` for vision models and `input_values` for speech models).
"""
config_class = None
base_model_prefix = ""
main_input_name = "input_ids"
@ -3242,6 +3243,7 @@ class TFSharedEmbeddings(tf.keras.layers.Layer):
kwargs (`Dict[str, Any]`, *optional*):
Additional keyword arguments passed along to the `__init__` of `tf.keras.layers.Layer`.
"""
# TODO (joao): flagged for delection due to embeddings refactor
def __init__(self, vocab_size: int, hidden_size: int, initializer_range: Optional[float] = None, **kwargs):

View File

@ -1095,6 +1095,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
models, `pixel_values` for vision models and `input_values` for speech models).
"""
config_class = None
base_model_prefix = ""
main_input_name = "input_ids"

View File

@ -97,6 +97,7 @@ class AlignTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "align_text_model"
def __init__(

View File

@ -100,6 +100,7 @@ class AltCLIPTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "altclip_text_model"
def __init__(

View File

@ -174,8 +174,7 @@ class AltCLIPOutput(ModelOutput):
text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The text embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPTextModel`].
image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The image embeddings obtained by applying the projection layer to the pooled output of
[`AltCLIPVisionModel`].
The image embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPVisionModel`].
text_model_output(`BaseModelOutputWithPooling`):
The output of the [`AltCLIPTextModel`].
vision_model_output(`BaseModelOutputWithPooling`):
@ -1049,9 +1048,7 @@ class AltCLIPPreTrainedModel(PreTrainedModel):
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
elif isinstance(module, AltCLIPMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)

View File

@ -35,6 +35,7 @@ class AltCLIPProcessor(ProcessorMixin):
tokenizer ([`XLMRobertaTokenizerFast`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")

View File

@ -86,6 +86,7 @@ class ASTConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "audio-spectrogram-transformer"
def __init__(

View File

@ -131,6 +131,7 @@ class AutoformerConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "autoformer"
attribute_map = {
"hidden_size": "d_model",

View File

@ -46,6 +46,7 @@ class BarkProcessor(ProcessorMixin):
a list of `voice_preset_names`.
"""
tokenizer_class = "AutoTokenizer"
attributes = ["tokenizer"]

View File

@ -107,6 +107,7 @@ class BartConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bart"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}

View File

@ -147,6 +147,7 @@ class BartTokenizerFast(PreTrainedTokenizerFast):
trim_offsets (`bool`, *optional*, defaults to `True`):
Whether the post processing step should trim offsets to avoid including whitespaces.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES

View File

@ -115,6 +115,7 @@ class BeitConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "beit"
def __init__(

View File

@ -136,6 +136,7 @@ class BertConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bert"
def __init__(

View File

@ -84,6 +84,7 @@ class BertGenerationConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bert-generation"
def __init__(

View File

@ -104,6 +104,7 @@ class BigBirdConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "big_bird"
def __init__(

View File

@ -896,15 +896,11 @@ class BigBirdBlockSparseAttention(nn.Module):
# global keys (corresponding to 1st key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
:, :, :, :, :to_block_size
].view(
bsz, n_heads, -1, to_block_size
) # first_band_product
].view(bsz, n_heads, -1, to_block_size) # first_band_product
# global keys (corresponding to last key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
:, :, :, :, -to_block_size:
].view(
bsz, n_heads, -1, to_block_size
) # last_band_product
].view(bsz, n_heads, -1, to_block_size) # last_band_product
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch

View File

@ -120,6 +120,7 @@ class BigBirdPegasusConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bigbird_pegasus"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {

View File

@ -683,15 +683,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
# global keys (corresponding to 1st key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
:, :, :, :, :to_block_size
].view(
bsz, n_heads, -1, to_block_size
) # first_band_product
].view(bsz, n_heads, -1, to_block_size) # first_band_product
# global keys (corresponding to last key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
:, :, :, :, -to_block_size:
].view(
bsz, n_heads, -1, to_block_size
) # last_band_product
].view(bsz, n_heads, -1, to_block_size) # last_band_product
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch

View File

@ -93,6 +93,7 @@ class BioGptConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "biogpt"
def __init__(

View File

@ -85,6 +85,7 @@ class BitConfig(BackboneConfigMixin, PretrainedConfig):
>>> configuration = model.config
```
"""
model_type = "bit"
layer_types = ["preactivation", "bottleneck"]
supported_padding = ["SAME", "VALID"]

View File

@ -104,6 +104,7 @@ class BlenderbotConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "blenderbot"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}

View File

@ -1511,9 +1511,7 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
>>> from transformers import AutoTokenizer, BlenderbotForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
>>> model = BlenderbotForCausalLM.from_pretrained(
... "facebook/blenderbot-400M-distill", add_cross_attention=False
... )
>>> model = BlenderbotForCausalLM.from_pretrained("facebook/blenderbot-400M-distill", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

View File

@ -376,8 +376,8 @@ class BlenderbotTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):

View File

@ -212,8 +212,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
having been set.
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will
greedily comprise the space before the *<mask>*.
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
comprise the space before the *<mask>*.
"""
if self._mask_token is None:
if self.verbose:
@ -264,8 +264,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):

View File

@ -104,6 +104,7 @@ class BlenderbotSmallConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "blenderbot-small"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}

View File

@ -1478,9 +1478,7 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
>>> from transformers import AutoTokenizer, BlenderbotSmallForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot_small-90M")
>>> model = BlenderbotSmallForCausalLM.from_pretrained(
... "facebook/blenderbot_small-90M", add_cross_attention=False
... )
>>> model = BlenderbotSmallForCausalLM.from_pretrained("facebook/blenderbot_small-90M", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

View File

@ -109,6 +109,7 @@ class BlipTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "blip_text_model"
def __init__(

View File

@ -742,13 +742,13 @@ class BlipTextModel(BlipTextPreTrainedModel):
# If a 2D or 3D attention mask is provided for the cross-attention
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
if encoder_hidden_states is not None:
if type(encoder_hidden_states) == list:
if isinstance(encoder_hidden_states, list):
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].size()
else:
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
if type(encoder_attention_mask) == list:
if isinstance(encoder_attention_mask, list):
encoder_extended_attention_mask = [self.invert_attention_mask(mask) for mask in encoder_attention_mask]
elif encoder_attention_mask is None:
encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)

View File

@ -741,13 +741,13 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
# If a 2D or 3D attention mask is provided for the cross-attention
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
if encoder_hidden_states is not None:
if type(encoder_hidden_states) == list:
if isinstance(encoder_hidden_states, list):
encoder_batch_size, encoder_sequence_length, _ = shape_list(encoder_hidden_states[0])
else:
encoder_batch_size, encoder_sequence_length, _ = shape_list(encoder_hidden_states)
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
if type(encoder_attention_mask) == list:
if isinstance(encoder_attention_mask, list):
encoder_extended_attention_mask = [invert_attention_mask(mask) for mask in encoder_attention_mask]
elif encoder_attention_mask is None:
encoder_attention_mask = tf.ones(encoder_hidden_shape)

View File

@ -37,6 +37,7 @@ class BlipProcessor(ProcessorMixin):
tokenizer (`BertTokenizerFast`):
An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "BlipImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

View File

@ -190,6 +190,7 @@ class Blip2QFormerConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "blip_2_qformer"
def __init__(

View File

@ -1123,13 +1123,13 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
# If a 2D or 3D attention mask is provided for the cross-attention
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
if encoder_hidden_states is not None:
if type(encoder_hidden_states) == list:
if isinstance(encoder_hidden_states, list):
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].size()
else:
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
if type(encoder_attention_mask) == list:
if isinstance(encoder_attention_mask, list):
encoder_extended_attention_mask = [self.invert_attention_mask(mask) for mask in encoder_attention_mask]
elif encoder_attention_mask is None:
encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)

View File

@ -37,6 +37,7 @@ class Blip2Processor(ProcessorMixin):
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "BlipImageProcessor"
tokenizer_class = "AutoTokenizer"
@ -141,8 +142,8 @@ class Blip2Processor(ProcessorMixin):
# Copied from transformers.models.blip.processing_blip.BlipProcessor.decode with BertTokenizerFast->PreTrainedTokenizer
def decode(self, *args, **kwargs):
"""
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
to the docstring of this method for more information.
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
"""
return self.tokenizer.decode(*args, **kwargs)

View File

@ -73,6 +73,7 @@ class BridgeTowerVisionConfig(PretrainedConfig):
>>> # Accessing the configuration
>>> configuration
```"""
model_type = "bridgetower_vision_model"
def __init__(
@ -179,6 +180,7 @@ class BridgeTowerTextConfig(PretrainedConfig):
>>> # Accessing the configuration
>>> configuration
```"""
model_type = "bridgetower_text_model"
def __init__(
@ -291,6 +293,7 @@ class BridgeTowerConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bridgetower"
def __init__(

View File

@ -46,7 +46,7 @@ _TOKENIZER_FOR_DOC = "RobertaTokenizer"
BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"BridgeTower/bridgetower-base",
"BridgeTower/bridgetower-base-itm-mlm"
"BridgeTower/bridgetower-base-itm-mlm",
# See all bridgetower models at https://huggingface.co/BridgeTower
]

View File

@ -38,6 +38,7 @@ class BridgeTowerProcessor(ProcessorMixin):
tokenizer (`RobertaTokenizerFast`):
An instance of ['RobertaTokenizerFast`]. The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "BridgeTowerImageProcessor"
tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")

View File

@ -90,6 +90,7 @@ class BrosConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bros"
def __init__(

View File

@ -34,6 +34,7 @@ class BrosProcessor(ProcessorMixin):
tokenizer (`BertTokenizerFast`, *optional*):
An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
"""
attributes = ["tokenizer"]
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

View File

@ -95,6 +95,7 @@ class CanineConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "canine"
def __init__(

View File

@ -54,7 +54,7 @@ _CONFIG_FOR_DOC = "CanineConfig"
CANINE_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/canine-s",
"google/canine-r"
"google/canine-r",
# See all CANINE models at https://huggingface.co/models?filter=canine
]

View File

@ -106,6 +106,7 @@ class ChineseCLIPTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "chinese_clip_text_model"
def __init__(

View File

@ -718,9 +718,7 @@ class ChineseCLIPPreTrainedModel(PreTrainedModel):
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
elif isinstance(module, ChineseCLIPVisionMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)

View File

@ -36,6 +36,7 @@ class ChineseCLIPProcessor(ProcessorMixin):
tokenizer ([`BertTokenizerFast`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "ChineseCLIPImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

View File

@ -97,6 +97,7 @@ class ClapTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "clap_text_model"
def __init__(

View File

@ -33,6 +33,7 @@ class ClapProcessor(ProcessorMixin):
tokenizer ([`RobertaTokenizerFast`]):
The tokenizer is a required input.
"""
feature_extractor_class = "ClapFeatureExtractor"
tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")

View File

@ -96,6 +96,7 @@ class CLIPTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "clip_text_model"
def __init__(

View File

@ -421,9 +421,7 @@ class CLIPPreTrainedModel(PreTrainedModel):
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
elif isinstance(module, CLIPMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)

View File

@ -35,6 +35,7 @@ class CLIPProcessor(ProcessorMixin):
tokenizer ([`CLIPTokenizerFast`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")

View File

@ -86,6 +86,7 @@ class CLIPSegTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "clipseg_text_model"
def __init__(

View File

@ -77,8 +77,7 @@ class CLIPSegOutput(ModelOutput):
text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The text embeddings obtained by applying the projection layer to the pooled output of [`CLIPSegTextModel`].
image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The image embeddings obtained by applying the projection layer to the pooled output of
[`CLIPSegVisionModel`].
The image embeddings obtained by applying the projection layer to the pooled output of [`CLIPSegVisionModel`].
text_model_output(`BaseModelOutputWithPooling`):
The output of the [`CLIPSegTextModel`].
vision_model_output(`BaseModelOutputWithPooling`):
@ -443,9 +442,7 @@ class CLIPSegPreTrainedModel(PreTrainedModel):
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
elif isinstance(module, CLIPSegMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)

View File

@ -35,6 +35,7 @@ class CLIPSegProcessor(ProcessorMixin):
tokenizer ([`CLIPTokenizerFast`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "ViTImageProcessor"
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")

View File

@ -684,9 +684,7 @@ class ClvpPreTrainedModel(PreTrainedModel):
module.bias.data.zero_()
elif isinstance(module, ClvpEncoderMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.proj.weight if getattr(module.fc1, "proj") else module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)

View File

@ -34,6 +34,7 @@ class ClvpProcessor(ProcessorMixin):
tokenizer (`ClvpTokenizer`):
An instance of [`ClvpTokenizer`]. The tokenizer is a required input.
"""
feature_extractor_class = "ClvpFeatureExtractor"
tokenizer_class = "ClvpTokenizer"
model_input_names = [
@ -76,15 +77,15 @@ class ClvpProcessor(ProcessorMixin):
# Copied from transformers.models.whisper.processing_whisper.WhisperProcessor.batch_decode with Whisper->Clvp
def batch_decode(self, *args, **kwargs):
"""
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please refer
to the docstring of this method for more information.
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
"""
return self.tokenizer.batch_decode(*args, **kwargs)
# Copied from transformers.models.whisper.processing_whisper.WhisperProcessor.decode with Whisper->Clvp
def decode(self, *args, **kwargs):
"""
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to the
docstring of this method for more information.
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
"""
return self.tokenizer.decode(*args, **kwargs)

View File

@ -105,6 +105,7 @@ class CodeGenConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "codegen"
attribute_map = {
"max_position_embeddings": "n_positions",

View File

@ -134,6 +134,7 @@ class ConditionalDetrConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "conditional_detr"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {

View File

@ -478,8 +478,7 @@ def post_process_panoptic_sample(
threshold=0.85,
) -> Dict:
"""
Converts the output of [`ConditionalDetrForSegmentation`] into panoptic segmentation predictions for a single
sample.
Converts the output of [`ConditionalDetrForSegmentation`] into panoptic segmentation predictions for a single sample.
Args:
out_logits (`torch.Tensor`):
@ -1454,8 +1453,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation with Detr->ConditionalDetr
def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None):
"""
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports
PyTorch.
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch.
Args:
outputs ([`ConditionalDetrForSegmentation`]):
@ -1511,8 +1509,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
return_coco_annotation: Optional[bool] = False,
) -> List[Dict]:
"""
Converts the output of [`ConditionalDetrForSegmentation`] into instance segmentation predictions. Only supports
PyTorch.
Converts the output of [`ConditionalDetrForSegmentation`] into instance segmentation predictions. Only supports PyTorch.
Args:
outputs ([`ConditionalDetrForSegmentation`]):
@ -1596,8 +1593,8 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
target_sizes: Optional[List[Tuple[int, int]]] = None,
) -> List[Dict]:
"""
Converts the output of [`ConditionalDetrForSegmentation`] into image panoptic segmentation predictions. Only
supports PyTorch.
Converts the output of [`ConditionalDetrForSegmentation`] into image panoptic segmentation predictions. Only supports
PyTorch.
Args:
outputs ([`ConditionalDetrForSegmentation`]):

View File

@ -153,8 +153,8 @@ class ConditionalDetrObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve
the unnormalized bounding boxes.
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and
@ -217,14 +217,14 @@ class ConditionalDetrSegmentationOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve
the unnormalized bounding boxes.
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes.
pred_masks (`torch.FloatTensor` of shape `(batch_size, num_queries, height/4, width/4)`):
Segmentation masks logits for all queries. See also
[`~ConditionalDetrImageProcessor.post_process_semantic_segmentation`] or
[`~ConditionalDetrImageProcessor.post_process_instance_segmentation`]
[`~ConditionalDetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and
panoptic segmentation masks respectively.
[`~ConditionalDetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic
segmentation masks respectively.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxiliary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and

View File

@ -96,6 +96,7 @@ class ConvBertConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "convbert"
def __init__(

View File

@ -263,8 +263,8 @@ class ConvBertTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1

View File

@ -168,8 +168,8 @@ class ConvBertTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1

View File

@ -87,6 +87,7 @@ class ConvNextConfig(BackboneConfigMixin, PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "convnext"
def __init__(

View File

@ -79,6 +79,7 @@ class ConvNextV2Config(BackboneConfigMixin, PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "convnextv2"
def __init__(

View File

@ -84,6 +84,7 @@ class CpmAntConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "cpmant"
def __init__(

View File

@ -96,6 +96,7 @@ class CvtConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "cvt"
def __init__(

View File

@ -168,6 +168,7 @@ class Data2VecAudioConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "data2vec-audio"
def __init__(

View File

@ -95,6 +95,7 @@ class Data2VecTextConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "data2vec-text"
def __init__(

View File

@ -111,6 +111,7 @@ class Data2VecVisionConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "data2vec-vision"
def __init__(

View File

@ -289,8 +289,8 @@ class Data2VecVisionSelfAttention(nn.Module):
# Copied from transformers.models.beit.modeling_beit.BeitSelfOutput with Beit->Data2VecVision
class Data2VecVisionSelfOutput(nn.Module):
"""
The residual connection is defined in Data2VecVisionLayer instead of here (as is the case with other models), due
to the layernorm applied before each block.
The residual connection is defined in Data2VecVisionLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
"""
def __init__(self, config: Data2VecVisionConfig) -> None:

Some files were not shown because too many files have changed in this diff Show More