mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 11:11:05 +06:00
remove duplicate accumulate gradient step arguments
This commit is contained in:
parent
0c24db9d5f
commit
34bdc8b54f
@ -392,10 +392,6 @@ def main():
|
|||||||
default=False,
|
default=False,
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help="Whether not to use CUDA when available")
|
help="Whether not to use CUDA when available")
|
||||||
parser.add_argument("--accumulate_gradients",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="Number of steps to accumulate gradient on (divide the batch_size and accumulate)")
|
|
||||||
parser.add_argument("--local_rank",
|
parser.add_argument("--local_rank",
|
||||||
type=int,
|
type=int,
|
||||||
default=-1,
|
default=-1,
|
||||||
@ -426,11 +422,11 @@ def main():
|
|||||||
torch.distributed.init_process_group(backend='nccl')
|
torch.distributed.init_process_group(backend='nccl')
|
||||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
||||||
|
|
||||||
if args.accumulate_gradients < 1:
|
if args.gradient_accumulation_steps < 1:
|
||||||
raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format(
|
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||||
args.accumulate_gradients))
|
args.gradient_accumulation_steps))
|
||||||
|
|
||||||
args.train_batch_size = int(args.train_batch_size / args.accumulate_gradients)
|
args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)
|
||||||
|
|
||||||
random.seed(args.seed)
|
random.seed(args.seed)
|
||||||
np.random.seed(args.seed)
|
np.random.seed(args.seed)
|
||||||
|
12
run_squad.py
12
run_squad.py
@ -731,10 +731,6 @@ def main():
|
|||||||
type=int,
|
type=int,
|
||||||
default=-1,
|
default=-1,
|
||||||
help="local_rank for distributed training on gpus")
|
help="local_rank for distributed training on gpus")
|
||||||
parser.add_argument("--accumulate_gradients",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="Number of steps to accumulate gradient on (divide the batch_size and accumulate)")
|
|
||||||
parser.add_argument('--seed',
|
parser.add_argument('--seed',
|
||||||
type=int,
|
type=int,
|
||||||
default=42,
|
default=42,
|
||||||
@ -756,11 +752,11 @@ def main():
|
|||||||
torch.distributed.init_process_group(backend='nccl')
|
torch.distributed.init_process_group(backend='nccl')
|
||||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
||||||
|
|
||||||
if args.accumulate_gradients < 1:
|
if args.gradient_accumulation_steps < 1:
|
||||||
raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format(
|
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||||
args.accumulate_gradients))
|
args.gradient_accumulation_steps))
|
||||||
|
|
||||||
args.train_batch_size = int(args.train_batch_size / args.accumulate_gradients)
|
args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)
|
||||||
|
|
||||||
random.seed(args.seed)
|
random.seed(args.seed)
|
||||||
np.random.seed(args.seed)
|
np.random.seed(args.seed)
|
||||||
|
Loading…
Reference in New Issue
Block a user