Add token arugment in example scripts (#25172)

* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2023-08-02 11:17:31 +02:00 committed by GitHub
parent c6a8768dab
commit 149cb0cce2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 987 additions and 420 deletions

View File

@ -22,6 +22,7 @@ import logging
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from functools import partial from functools import partial
@ -182,15 +183,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -389,6 +396,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax") send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax")
@ -448,7 +461,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -465,7 +478,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -475,18 +488,18 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id) tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id)

View File

@ -26,6 +26,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
@ -168,15 +169,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -463,6 +470,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax") send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax")
@ -517,7 +530,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -526,14 +539,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -548,7 +561,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -557,14 +570,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -576,14 +589,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
@ -596,13 +609,13 @@ def main():
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
vocab_size=len(tokenizer), vocab_size=len(tokenizer),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = BartConfig.from_pretrained( config = BartConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -707,7 +720,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config.vocab_size = len(tokenizer) config.vocab_size = len(tokenizer)

View File

@ -27,6 +27,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
@ -169,15 +170,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -334,6 +341,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="flax") send_example_telemetry("run_clm", model_args, data_args, framework="flax")
@ -397,7 +410,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in dataset.keys(): if "validation" not in dataset.keys():
@ -406,14 +419,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
dataset["train"] = load_dataset( dataset["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -431,7 +444,7 @@ def main():
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in dataset.keys(): if "validation" not in dataset.keys():
@ -441,7 +454,7 @@ def main():
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
dataset["train"] = load_dataset( dataset["train"] = load_dataset(
extension, extension,
@ -449,7 +462,7 @@ def main():
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -463,13 +476,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -480,14 +493,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
@ -501,7 +514,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForCausalLM.from_config( model = FlaxAutoModelForCausalLM.from_config(

View File

@ -26,6 +26,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
@ -174,15 +175,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -377,6 +384,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args, framework="flax") send_example_telemetry("run_mlm", model_args, data_args, framework="flax")
@ -434,7 +447,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -443,14 +456,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -465,7 +478,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -474,14 +487,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -495,13 +508,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -512,14 +525,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
@ -638,7 +651,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForMaskedLM.from_config( model = FlaxAutoModelForMaskedLM.from_config(

View File

@ -25,6 +25,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
@ -168,15 +169,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -504,6 +511,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax") send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax")
@ -558,7 +571,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -567,14 +580,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -589,7 +602,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
@ -598,14 +611,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -617,14 +630,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
@ -637,13 +650,13 @@ def main():
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
vocab_size=len(tokenizer), vocab_size=len(tokenizer),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = T5Config.from_pretrained( config = T5Config.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -738,7 +751,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config.vocab_size = len(tokenizer) config.vocab_size = len(tokenizer)

View File

@ -25,6 +25,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
@ -155,15 +156,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
dtype: Optional[str] = field( dtype: Optional[str] = field(
default="float32", default="float32",
metadata={ metadata={
@ -438,6 +445,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args, framework="flax") send_example_telemetry("run_qa", model_args, data_args, framework="flax")
@ -487,7 +500,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
@ -507,7 +520,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -520,14 +533,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
@ -874,7 +887,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
) )

View File

@ -24,6 +24,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from functools import partial from functools import partial
@ -188,15 +189,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -417,6 +424,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args, framework="flax") send_example_telemetry("run_summarization", model_args, data_args, framework="flax")
@ -475,7 +488,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -492,7 +505,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -503,13 +516,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -520,14 +533,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
@ -541,7 +554,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForSeq2SeqLM.from_config( model = FlaxAutoModelForSeq2SeqLM.from_config(

View File

@ -21,6 +21,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple from typing import Any, Callable, Dict, Optional, Tuple
@ -101,15 +102,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -321,6 +328,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args, framework="flax") send_example_telemetry("run_glue", model_args, data_args, framework="flax")
@ -368,7 +381,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
"glue", "glue",
data_args.task_name, data_args.task_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
@ -381,7 +394,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -411,17 +424,17 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
num_labels=num_labels, num_labels=num_labels,
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
use_fast=not model_args.use_slow_tokenizer, use_fast=not model_args.use_slow_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = FlaxAutoModelForSequenceClassification.from_pretrained( model = FlaxAutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets # Preprocessing the datasets

View File

@ -21,6 +21,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
@ -149,15 +150,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -377,6 +384,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args, framework="flax") send_example_telemetry("run_ner", model_args, data_args, framework="flax")
@ -422,7 +435,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
@ -436,7 +449,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -490,7 +503,7 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
if config.model_type in {"gpt2", "roberta"}: if config.model_type in {"gpt2", "roberta"}:
@ -498,7 +511,7 @@ def main():
tokenizer_name_or_path, tokenizer_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
add_prefix_space=True, add_prefix_space=True,
) )
else: else:
@ -506,14 +519,14 @@ def main():
tokenizer_name_or_path, tokenizer_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = FlaxAutoModelForTokenClassification.from_pretrained( model = FlaxAutoModelForTokenClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets # Preprocessing the datasets

View File

@ -24,6 +24,7 @@ import logging
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
@ -159,15 +160,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -257,6 +264,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_classification", model_args, data_args, framework="flax") send_example_telemetry("run_image_classification", model_args, data_args, framework="flax")
@ -338,7 +351,7 @@ def main():
num_labels=len(train_dataset.classes), num_labels=len(train_dataset.classes),
image_size=data_args.image_size, image_size=data_args.image_size,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
@ -346,7 +359,7 @@ def main():
num_labels=len(train_dataset.classes), num_labels=len(train_dataset.classes),
image_size=data_args.image_size, image_size=data_args.image_size,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
@ -358,7 +371,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForImageClassification.from_config( model = FlaxAutoModelForImageClassification.from_config(

View File

@ -152,15 +152,21 @@ class ModelArguments:
attention_mask: bool = field( attention_mask: bool = field(
default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."} default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."}
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_feature_extractor: Optional[bool] = field( freeze_feature_extractor: Optional[bool] = field(
default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."} default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
) )
@ -198,6 +204,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_audio_classification", model_args, data_args) send_example_telemetry("run_audio_classification", model_args, data_args)
@ -250,13 +262,13 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["eval"] = load_dataset( raw_datasets["eval"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
@ -280,7 +292,7 @@ def main():
return_attention_mask=model_args.attention_mask, return_attention_mask=model_args.attention_mask,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# `datasets` takes care of automatically loading and resampling the audio, # `datasets` takes care of automatically loading and resampling the audio,
@ -340,7 +352,7 @@ def main():
finetuning_task="audio-classification", finetuning_task="audio-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForAudioClassification.from_pretrained( model = AutoModelForAudioClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -348,7 +360,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )

View File

@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask)
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -86,15 +87,21 @@ class ModelArguments:
default=True, default=True,
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_vision_model: bool = field( freeze_vision_model: bool = field(
default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} default=False, metadata={"help": "Whether to freeze the vision model parameters or not."}
) )
@ -235,6 +242,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clip", model_args, data_args) send_example_telemetry("run_clip", model_args, data_args)
@ -294,7 +307,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -311,7 +324,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -336,14 +349,14 @@ def main():
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModel.from_pretrained( model = AutoModel.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config = model.config config = model.config

View File

@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -142,15 +143,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -176,6 +183,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_classification", model_args, data_args) send_example_telemetry("run_image_classification", model_args, data_args)
@ -229,7 +242,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
task="image-classification", task="image-classification",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -276,7 +289,7 @@ def main():
finetuning_task="image-classification", finetuning_task="image-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForImageClassification.from_pretrained( model = AutoModelForImageClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -284,14 +297,14 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Define torchvision transforms to be applied to each image. # Define torchvision transforms to be applied to each image.

View File

@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -133,15 +134,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
mask_ratio: float = field( mask_ratio: float = field(
default=0.75, metadata={"help": "The ratio of the number of masked tokens in the input sequence."} default=0.75, metadata={"help": "The ratio of the number of masked tokens in the input sequence."}
) )
@ -175,6 +182,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mae", model_args, data_args) send_example_telemetry("run_mae", model_args, data_args)
@ -224,7 +237,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
data_files=data_args.data_files, data_files=data_args.data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
@ -242,7 +255,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs) config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs)
@ -280,7 +293,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
logger.info("Training new model from scratch") logger.info("Training new model from scratch")

View File

@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -153,15 +154,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
image_size: Optional[int] = field( image_size: Optional[int] = field(
default=None, default=None,
metadata={ metadata={
@ -239,6 +246,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mim", model_args, data_args) send_example_telemetry("run_mim", model_args, data_args)
@ -288,7 +301,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
data_files=data_args.data_files, data_files=data_args.data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
@ -305,7 +318,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name_or_path: if model_args.config_name_or_path:
config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs)
@ -357,7 +370,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
logger.info("Training new model from scratch") logger.info("Training new model from scratch")

View File

@ -25,6 +25,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
@ -111,15 +112,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
torch_dtype: Optional[str] = field( torch_dtype: Optional[str] = field(
default=None, default=None,
metadata={ metadata={
@ -238,6 +245,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args) send_example_telemetry("run_clm", model_args, data_args)
@ -300,7 +313,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
@ -309,7 +322,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
@ -317,7 +330,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
else: else:
@ -339,7 +352,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
@ -349,7 +362,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
@ -357,7 +370,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
@ -373,7 +386,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@ -391,7 +404,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@ -415,7 +428,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
torch_dtype=torch_dtype, torch_dtype=torch_dtype,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )

View File

@ -25,6 +25,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
@ -107,15 +108,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
low_cpu_mem_usage: bool = field( low_cpu_mem_usage: bool = field(
default=False, default=False,
metadata={ metadata={
@ -238,6 +245,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args) send_example_telemetry("run_mlm", model_args, data_args)
@ -301,7 +314,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
@ -310,7 +323,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
@ -318,7 +331,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
else: else:
@ -335,7 +348,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
@ -345,14 +358,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
@ -366,7 +379,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@ -384,7 +397,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@ -403,7 +416,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )
else: else:

View File

@ -22,6 +22,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
@ -95,15 +96,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
low_cpu_mem_usage: bool = field( low_cpu_mem_usage: bool = field(
default=False, default=False,
metadata={ metadata={
@ -229,6 +236,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_plm", model_args, data_args) send_example_telemetry("run_plm", model_args, data_args)
@ -291,7 +304,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
@ -299,14 +312,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -325,14 +338,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
@ -346,7 +359,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@ -364,7 +377,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@ -383,7 +396,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )
else: else:

View File

@ -21,6 +21,7 @@ Fine-tuning the library models for multiple choice.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional, Union from typing import Optional, Union
@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -225,6 +232,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args) send_example_telemetry("run_swag", model_args, data_args)
@ -292,7 +305,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Downloading and loading the swag dataset from the hub. # Downloading and loading the swag dataset from the hub.
@ -300,7 +313,7 @@ def main():
"swag", "swag",
"regular", "regular",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -314,14 +327,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForMultipleChoice.from_pretrained( model = AutoModelForMultipleChoice.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -329,7 +342,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# When using your own dataset or a different dataset from swag, you will probably need to change this. # When using your own dataset or a different dataset from swag, you will probably need to change this.

View File

@ -21,6 +21,7 @@ Fine-tuning the library models for question answering using a slightly adapted v
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -227,6 +234,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args) send_example_telemetry("run_qa", model_args, data_args)
@ -289,7 +302,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -308,7 +321,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -322,14 +335,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForQuestionAnswering.from_pretrained( model = AutoModelForQuestionAnswering.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -337,7 +350,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Tokenizer check: this script requires a fast tokenizer. # Tokenizer check: this script requires a fast tokenizer.

View File

@ -21,6 +21,7 @@ Fine-tuning XLNet for question answering with beam search using a slightly adapt
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -78,15 +79,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa_beam_search", model_args, data_args) send_example_telemetry("run_qa_beam_search", model_args, data_args)
@ -288,7 +301,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -306,7 +319,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -320,13 +333,13 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = XLNetTokenizerFast.from_pretrained( tokenizer = XLNetTokenizerFast.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = XLNetForQuestionAnswering.from_pretrained( model = XLNetForQuestionAnswering.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -334,7 +347,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets. # Preprocessing the datasets.

View File

@ -21,6 +21,7 @@ Fine-tuning the library's seq2seq models for question answering using the 🤗 S
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
@ -80,15 +81,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -273,6 +280,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_seq2seq_qa", model_args, data_args) send_example_telemetry("run_seq2seq_qa", model_args, data_args)
@ -335,7 +348,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -353,7 +366,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -367,14 +380,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -382,7 +395,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch

View File

@ -18,6 +18,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -241,15 +242,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def main(): def main():
@ -265,6 +272,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_semantic_segmentation", model_args, data_args) send_example_telemetry("run_semantic_segmentation", model_args, data_args)
@ -379,7 +392,7 @@ def main():
id2label=id2label, id2label=id2label,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSemanticSegmentation.from_pretrained( model = AutoModelForSemanticSegmentation.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -387,13 +400,13 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Define torchvision transforms to be applied to each image + target. # Define torchvision transforms to be applied to each image + target.

View File

@ -229,15 +229,21 @@ class DataTrainingArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"If :obj:`True`, will use the token generated when running" "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
unk_token: str = field( unk_token: str = field(
default="[UNK]", default="[UNK]",
metadata={"help": "The unk token for the tokenizer"}, metadata={"help": "The unk token for the tokenizer"},
@ -379,6 +385,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if data_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if data_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
data_args.token = data_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_ctc", model_args, data_args) send_example_telemetry("run_speech_recognition_ctc", model_args, data_args)
@ -427,7 +439,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
@ -452,7 +464,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
@ -490,7 +502,9 @@ def main():
# the tokenizer # the tokenizer
# load config # load config
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# 4. Next, if no tokenizer file is defined, # 4. Next, if no tokenizer file is defined,
@ -546,11 +560,13 @@ def main():
# load feature_extractor and tokenizer # load feature_extractor and tokenizer
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path, tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token, token=data_args.token,
**tokenizer_kwargs, **tokenizer_kwargs,
) )
feature_extractor = AutoFeatureExtractor.from_pretrained( feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# adapt config # adapt config
@ -578,7 +594,7 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
config=config, config=config,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
# freeze encoder # freeze encoder

View File

@ -232,15 +232,21 @@ class DataTrainingArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"If :obj:`True`, will use the token generated when running" "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
unk_token: str = field( unk_token: str = field(
default="[UNK]", default="[UNK]",
metadata={"help": "The unk token for the tokenizer"}, metadata={"help": "The unk token for the tokenizer"},
@ -375,6 +381,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if data_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if data_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
data_args.token = data_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_ctc_adapter", model_args, data_args) send_example_telemetry("run_speech_recognition_ctc_adapter", model_args, data_args)
@ -423,7 +435,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
@ -448,7 +460,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
@ -486,7 +498,9 @@ def main():
# the tokenizer # the tokenizer
# load config # load config
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# 4. Next, if no tokenizer file is defined, # 4. Next, if no tokenizer file is defined,
@ -500,7 +514,10 @@ def main():
vocab_dict = {} vocab_dict = {}
if tokenizer_name_or_path is not None: if tokenizer_name_or_path is not None:
# load vocabulary of other adapter languages so that new language can be appended # load vocabulary of other adapter languages so that new language can be appended
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_auth_token=data_args.use_auth_token) tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path,
token=data_args.token,
)
vocab_dict = tokenizer.vocab.copy() vocab_dict = tokenizer.vocab.copy()
if tokenizer.target_lang is None: if tokenizer.target_lang is None:
raise ValueError("Make sure to load a multi-lingual tokenizer with a set target language.") raise ValueError("Make sure to load a multi-lingual tokenizer with a set target language.")
@ -566,11 +583,13 @@ def main():
# load feature_extractor and tokenizer # load feature_extractor and tokenizer
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path, tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token, token=data_args.token,
**tokenizer_kwargs, **tokenizer_kwargs,
) )
feature_extractor = AutoFeatureExtractor.from_pretrained( feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# adapt config # adapt config
@ -595,7 +614,7 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
config=config, config=config,
use_auth_token=data_args.use_auth_token, token=data_args.token,
ignore_mismatched_sizes=True, ignore_mismatched_sizes=True,
) )

View File

@ -22,6 +22,7 @@ Fine-tuning the library models for sequence to sequence speech recognition.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
@ -85,15 +86,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_feature_encoder: bool = field( freeze_feature_encoder: bool = field(
default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
) )
@ -278,6 +285,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args) send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args)
@ -336,7 +349,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if training_args.do_eval: if training_args.do_eval:
@ -345,7 +358,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
@ -370,7 +383,7 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens}) config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
@ -383,21 +396,21 @@ def main():
model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSpeechSeq2Seq.from_pretrained( model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if model.config.decoder_start_token_id is None: if model.config.decoder_start_token_id is None:

View File

@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -99,15 +100,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
resize_position_embeddings: Optional[bool] = field( resize_position_embeddings: Optional[bool] = field(
default=None, default=None,
metadata={ metadata={
@ -312,6 +319,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args) send_example_telemetry("run_summarization", model_args, data_args)
@ -386,7 +399,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -403,7 +416,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -417,14 +430,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -432,7 +445,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch

View File

@ -20,6 +20,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import List, Optional
@ -227,15 +228,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -268,6 +275,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_classification", model_args, data_args) send_example_telemetry("run_classification", model_args, data_args)
@ -327,7 +340,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Try print some info about the dataset # Try print some info about the dataset
logger.info(f"Dataset loaded: {raw_datasets}") logger.info(f"Dataset loaded: {raw_datasets}")
@ -358,7 +371,7 @@ def main():
"csv", "csv",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from local json files # Loading a dataset from local json files
@ -366,7 +379,7 @@ def main():
"json", "json",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
@ -468,7 +481,7 @@ def main():
finetuning_task="text-classification", finetuning_task="text-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if is_regression: if is_regression:
@ -486,7 +499,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -494,7 +507,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )

View File

@ -20,6 +20,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -188,15 +189,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -216,6 +223,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args) send_example_telemetry("run_glue", model_args, data_args)
@ -281,7 +294,7 @@ def main():
"glue", "glue",
data_args.task_name, data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif data_args.dataset_name is not None: elif data_args.dataset_name is not None:
# Downloading and loading a dataset from the hub. # Downloading and loading a dataset from the hub.
@ -289,7 +302,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from your local files. # Loading a dataset from your local files.
@ -318,7 +331,7 @@ def main():
"csv", "csv",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from local json files # Loading a dataset from local json files
@ -326,7 +339,7 @@ def main():
"json", "json",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -361,14 +374,14 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -376,7 +389,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )

View File

@ -21,6 +21,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -152,15 +153,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -175,6 +182,12 @@ def main():
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_xnli", model_args) send_example_telemetry("run_xnli", model_args)
@ -232,7 +245,7 @@ def main():
model_args.language, model_args.language,
split="train", split="train",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
train_dataset = load_dataset( train_dataset = load_dataset(
@ -240,7 +253,7 @@ def main():
model_args.train_language, model_args.train_language,
split="train", split="train",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = train_dataset.features["label"].names label_list = train_dataset.features["label"].names
@ -250,7 +263,7 @@ def main():
model_args.language, model_args.language,
split="validation", split="validation",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = eval_dataset.features["label"].names label_list = eval_dataset.features["label"].names
@ -260,7 +273,7 @@ def main():
model_args.language, model_args.language,
split="test", split="test",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = predict_dataset.features["label"].names label_list = predict_dataset.features["label"].names
@ -278,7 +291,7 @@ def main():
finetuning_task="xnli", finetuning_task="xnli",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
@ -286,7 +299,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -294,7 +307,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )

View File

@ -22,6 +22,7 @@ Fine-tuning the library models for token classification.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -217,6 +224,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args) send_example_telemetry("run_ner", model_args, data_args)
@ -279,7 +292,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -348,7 +361,7 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
@ -358,7 +371,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
add_prefix_space=True, add_prefix_space=True,
) )
else: else:
@ -367,7 +380,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForTokenClassification.from_pretrained( model = AutoModelForTokenClassification.from_pretrained(
@ -376,7 +389,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )

View File

@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -89,15 +90,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -261,6 +268,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation", model_args, data_args) send_example_telemetry("run_translation", model_args, data_args)
@ -335,7 +348,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -352,7 +365,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -366,14 +379,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
@ -381,7 +394,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch

View File

@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask)
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -92,15 +93,21 @@ class ModelArguments:
default=True, default=True,
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_vision_model: bool = field( freeze_vision_model: bool = field(
default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} default=False, metadata={"help": "Whether to freeze the vision model parameters or not."}
) )
@ -245,6 +252,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
if model_args.model_name_or_path is not None: if model_args.model_name_or_path is not None:
if model_args.vision_model_name_or_path is not None or model_args.text_model_name_or_path is not None: if model_args.vision_model_name_or_path is not None or model_args.text_model_name_or_path is not None:
raise ValueError( raise ValueError(
@ -315,7 +328,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -332,7 +345,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -362,14 +375,14 @@ def main():
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
with training_args.strategy.scope(): with training_args.strategy.scope():
model = TFAutoModel.from_pretrained( model = TFAutoModel.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Load image_processor, in this script we only use this to get the mean and std for normalization. # Load image_processor, in this script we only use this to get the mean and std for normalization.
@ -377,14 +390,14 @@ def main():
model_args.image_processor_name or model_args.vision_model_name_or_path, model_args.image_processor_name or model_args.vision_model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
with training_args.strategy.scope(): with training_args.strategy.scope():
model = TFVisionTextDualEncoderModel.from_vision_text_pretrained( model = TFVisionTextDualEncoderModel.from_vision_text_pretrained(
vision_model_name_or_path=model_args.vision_model_name_or_path, vision_model_name_or_path=model_args.vision_model_name_or_path,
text_model_name_or_path=model_args.text_model_name_or_path, text_model_name_or_path=model_args.text_model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config = model.config config = model.config

View File

@ -23,6 +23,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -157,15 +158,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
if not (training_args.do_train or training_args.do_eval or training_args.do_predict): if not (training_args.do_train or training_args.do_eval or training_args.do_predict):
exit("Must specify at least one of --do_train, --do_eval or --do_predict!") exit("Must specify at least one of --do_train, --do_eval or --do_predict!")
@ -275,7 +288,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
task="image-classification", task="image-classification",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -309,13 +322,13 @@ def main():
finetuning_task="image-classification", finetuning_task="image-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
@ -435,7 +448,7 @@ def main():
from_pt=bool(".bin" in model_path), from_pt=bool(".bin" in model_path),
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync

View File

@ -30,6 +30,7 @@ import math
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
@ -112,15 +113,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def __post_init__(self): def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@ -220,6 +227,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow") send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow")
@ -287,7 +300,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
@ -295,14 +308,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -323,7 +336,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
@ -333,7 +346,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
@ -341,7 +354,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at

View File

@ -28,6 +28,7 @@ import math
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
@ -110,15 +111,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def __post_init__(self): def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow") send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow")
@ -296,20 +309,20 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -323,7 +336,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at

View File

@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
@ -146,15 +147,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -239,6 +246,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow") send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow")
@ -301,7 +314,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Downloading and loading the swag dataset from the hub. # Downloading and loading the swag dataset from the hub.
@ -309,7 +322,7 @@ def main():
"swag", "swag",
"regular", "regular",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -335,14 +348,14 @@ def main():
config_path, config_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
@ -428,7 +441,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync

View File

@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@ -77,15 +78,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -245,6 +252,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow") send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow")
@ -304,7 +317,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -323,7 +336,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -338,14 +351,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
@ -625,7 +638,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if training_args.do_train: if training_args.do_train:
training_dataset = model.prepare_tf_dataset( training_dataset = model.prepare_tf_dataset(

View File

@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -99,15 +100,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -287,6 +294,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow") send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow")
@ -355,7 +368,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -372,7 +385,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -388,14 +401,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
prefix = data_args.source_prefix if data_args.source_prefix is not None else "" prefix = data_args.source_prefix if data_args.source_prefix is not None else ""
@ -513,7 +526,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch

View File

@ -20,6 +20,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -164,15 +165,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
# endregion # endregion
@ -192,6 +199,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow") send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow")
@ -242,7 +255,7 @@ def main():
"glue", "glue",
data_args.task_name, data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
@ -284,14 +297,14 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
@ -374,7 +387,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion

View File

@ -20,6 +20,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@ -170,15 +171,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
# endregion # endregion
@ -198,6 +205,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_text_classification", model_args, data_args, framework="tensorflow") send_example_telemetry("run_text_classification", model_args, data_args, framework="tensorflow")
@ -258,7 +271,7 @@ def main():
"csv", "csv",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from local json files # Loading a dataset from local json files
@ -301,20 +314,20 @@ def main():
num_labels=num_labels, num_labels=num_labels,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
config_path, config_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
@ -402,7 +415,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion

View File

@ -21,6 +21,7 @@ import json
import logging import logging
import os import os
import random import random
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -75,15 +76,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -196,6 +203,12 @@ def main():
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments)) parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow") send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow")
@ -228,7 +241,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -240,7 +253,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.

View File

@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -93,15 +94,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
@ -268,6 +275,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow") send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow")
@ -322,7 +335,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
@ -336,7 +349,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading # https://huggingface.co/docs/datasets/loading
@ -352,14 +365,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
prefix = data_args.source_prefix if data_args.source_prefix is not None else "" prefix = data_args.source_prefix if data_args.source_prefix is not None else ""
@ -466,7 +479,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch