From d4b3e359aa36f050325ff6f950f0f36276b7b23f Mon Sep 17 00:00:00 2001 From: Zachary Mueller Date: Mon, 11 Apr 2022 12:42:45 -0400 Subject: [PATCH] Don't push checkpoints to hub in `no_trainer` scripts (#16703) Adds checkpoint prefixes to the gitignore if `push_to_hub` is used along with `checkpointint_steps` --- examples/pytorch/language-modeling/run_clm_no_trainer.py | 9 +++++++-- examples/pytorch/language-modeling/run_mlm_no_trainer.py | 8 +++++++- examples/pytorch/multiple-choice/run_swag_no_trainer.py | 8 +++++++- .../question-answering/run_qa_beam_search_no_trainer.py | 8 +++++++- examples/pytorch/question-answering/run_qa_no_trainer.py | 8 +++++++- .../summarization/run_summarization_no_trainer.py | 8 +++++++- .../pytorch/text-classification/run_glue_no_trainer.py | 8 +++++++- .../pytorch/token-classification/run_ner_no_trainer.py | 8 +++++++- .../pytorch/translation/run_translation_no_trainer.py | 8 +++++++- 9 files changed, 63 insertions(+), 10 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 247ba09d54a..3435ea2ecdb 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -39,6 +39,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator, DistributedType +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -50,7 +51,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version @@ -258,6 +258,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -542,7 +548,6 @@ def main(): if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) accelerator.save_state(output_dir) - if completed_steps >= args.max_train_steps: break diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 2634cc25e5b..2720e76d02e 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -39,6 +39,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator, DistributedType +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -50,7 +51,6 @@ from transformers import ( DataCollatorForLanguageModeling, SchedulerType, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version @@ -269,6 +269,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index a575644130f..f845cd43e2c 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -37,6 +37,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -49,7 +50,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import PaddingStrategy, get_full_repo_name @@ -296,6 +296,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 541f1f784e0..bdfa1e18932 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -34,6 +34,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( AdamW, @@ -45,7 +46,6 @@ from transformers import ( XLNetTokenizerFast, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import check_min_version, get_full_repo_name from transformers.utils.versions import require_version @@ -290,6 +290,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 6da75822398..925c31b7064 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -35,6 +35,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -48,7 +49,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import check_min_version, get_full_repo_name from transformers.utils.versions import require_version @@ -320,6 +320,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index adc9e616dda..1c93b064a6d 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -36,6 +36,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from filelock import FileLock from huggingface_hub import Repository from transformers import ( @@ -48,7 +49,6 @@ from transformers import ( DataCollatorForSeq2Seq, SchedulerType, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name, is_offline_mode from transformers.utils.versions import require_version @@ -346,6 +346,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 2c7fa186d0e..9730c2f3456 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -28,6 +28,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( AdamW, @@ -39,7 +40,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version @@ -223,6 +223,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index ab9fcce6df9..26f1ff41401 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -34,6 +34,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -47,7 +48,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version @@ -277,6 +277,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 034387582b8..f3f7fc1990b 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -35,6 +35,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from accelerate.utils import set_seed from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, @@ -49,7 +50,6 @@ from transformers import ( SchedulerType, default_data_collator, get_scheduler, - set_seed, ) from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version @@ -319,6 +319,12 @@ def main(): else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone()