mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
remove duplicate accumulate gradient step arguments
This commit is contained in:
parent
0c24db9d5f
commit
34bdc8b54f
@ -392,10 +392,6 @@ def main():
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Whether not to use CUDA when available")
|
||||
parser.add_argument("--accumulate_gradients",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of steps to accumulate gradient on (divide the batch_size and accumulate)")
|
||||
parser.add_argument("--local_rank",
|
||||
type=int,
|
||||
default=-1,
|
||||
@ -426,11 +422,11 @@ def main():
|
||||
torch.distributed.init_process_group(backend='nccl')
|
||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
||||
|
||||
if args.accumulate_gradients < 1:
|
||||
raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format(
|
||||
args.accumulate_gradients))
|
||||
if args.gradient_accumulation_steps < 1:
|
||||
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||
args.gradient_accumulation_steps))
|
||||
|
||||
args.train_batch_size = int(args.train_batch_size / args.accumulate_gradients)
|
||||
args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)
|
||||
|
||||
random.seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
|
12
run_squad.py
12
run_squad.py
@ -731,10 +731,6 @@ def main():
|
||||
type=int,
|
||||
default=-1,
|
||||
help="local_rank for distributed training on gpus")
|
||||
parser.add_argument("--accumulate_gradients",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of steps to accumulate gradient on (divide the batch_size and accumulate)")
|
||||
parser.add_argument('--seed',
|
||||
type=int,
|
||||
default=42,
|
||||
@ -756,11 +752,11 @@ def main():
|
||||
torch.distributed.init_process_group(backend='nccl')
|
||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
||||
|
||||
if args.accumulate_gradients < 1:
|
||||
raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format(
|
||||
args.accumulate_gradients))
|
||||
if args.gradient_accumulation_steps < 1:
|
||||
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||
args.gradient_accumulation_steps))
|
||||
|
||||
args.train_batch_size = int(args.train_batch_size / args.accumulate_gradients)
|
||||
args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)
|
||||
|
||||
random.seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
|
Loading…
Reference in New Issue
Block a user