mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Clean up staging tmp checkpoint directory (#28848)
clean up remaining tmp checkpoint dir Signed-off-by: woshiyyya <xiaoyunxuan1998@gmail.com>
This commit is contained in:
parent
136cd893dc
commit
c617f988f8
@ -2468,6 +2468,10 @@ class Trainer:
|
||||
# Solely rely on numerical checkpoint id for rotation.
|
||||
# mtime is not reliable especially on some fuse fs in cloud environments.
|
||||
self._rotate_checkpoints(use_mtime=False, output_dir=run_dir)
|
||||
elif self.is_local_process_zero():
|
||||
# Clean up the remaining staging checkpoint folders on other nodes
|
||||
if staging_output_dir != output_dir and os.path.exists(staging_output_dir):
|
||||
shutil.rmtree(staging_output_dir)
|
||||
|
||||
self.args.distributed_state.wait_for_everyone()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user