fix issue that some example with no trainer use accelerator.end_train… (#37435)

* fix issue that some example with no trainer use accelerator.end_training in a wrong way

* reformat code

---------

Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
This commit is contained in:
we1559 2025-04-18 23:59:42 +08:00 committed by GitHub
parent 6f5014ac31
commit b0c6ff5e13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 36 additions and 36 deletions

View File

@ -617,9 +617,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -640,6 +637,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f) json.dump(all_results, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -778,9 +778,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -798,6 +795,9 @@ def main():
token=args.hub_token, token=args.hub_token,
) )
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -714,9 +714,6 @@ def main():
logger.info(f"Test metrics: {metrics}") logger.info(f"Test metrics: {metrics}")
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -739,6 +736,9 @@ def main():
ignore_patterns=["epoch_*"], ignore_patterns=["epoch_*"],
) )
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -697,9 +697,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -719,6 +716,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f) json.dump({"perplexity": perplexity}, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -891,9 +891,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -908,6 +905,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f) json.dump({"perplexity": perplexity}, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -735,9 +735,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -757,6 +754,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f) json.dump({"perplexity": perplexity}, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -622,9 +622,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -645,6 +642,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f) json.dump(all_results, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -759,9 +759,6 @@ def main():
logger.info(f"Test metrics: {metrics}") logger.info(f"Test metrics: {metrics}")
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -784,6 +781,9 @@ def main():
ignore_patterns=["epoch_*"], ignore_patterns=["epoch_*"],
) )
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -602,9 +602,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -628,6 +625,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f, indent=2) json.dump(all_results, f, indent=2)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -634,9 +634,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -679,6 +676,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f) json.dump(all_results, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -794,9 +794,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -826,6 +823,9 @@ def main():
all_results[key] = int(value) all_results[key] = int(value)
json.dump(all_results, f) json.dump(all_results, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -762,9 +762,6 @@ def main():
output_dir = os.path.join(args.output_dir, output_dir) output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir) accelerator.save_state(output_dir)
if args.with_tracking:
accelerator.end_training()
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
@ -784,6 +781,9 @@ def main():
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"eval_bleu": eval_metric["score"]}, f) json.dump({"eval_bleu": eval_metric["score"]}, f)
accelerator.wait_for_everyone()
accelerator.end_training()
if __name__ == "__main__": if __name__ == "__main__":
main() main()