mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Fix minor comment typos (#15740)
This commit is contained in:
parent
a63bd3675f
commit
5444687f0f
@ -133,7 +133,7 @@ def main():
|
||||
"--alpha_mlm",
|
||||
default=0.0,
|
||||
type=float,
|
||||
help="Linear weight for the MLM loss. Must be >=0. Should be used in coonjunction with `mlm` flag.",
|
||||
help="Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag.",
|
||||
)
|
||||
parser.add_argument("--alpha_clm", default=0.5, type=float, help="Linear weight for the CLM loss. Must be >=0.")
|
||||
parser.add_argument("--alpha_mse", default=0.0, type=float, help="Linear weight of the MSE loss. Must be >=0.")
|
||||
@ -164,7 +164,7 @@ def main():
|
||||
parser.add_argument(
|
||||
"--restrict_ce_to_mask",
|
||||
action="store_true",
|
||||
help="If true, compute the distilation loss only the [MLM] prediction distribution.",
|
||||
help="If true, compute the distillation loss only the [MLM] prediction distribution.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--freeze_pos_embs",
|
||||
@ -192,7 +192,7 @@ def main():
|
||||
help="Gradient accumulation for larger training batches.",
|
||||
)
|
||||
parser.add_argument("--warmup_prop", default=0.05, type=float, help="Linear warmup proportion.")
|
||||
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
|
||||
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
|
||||
parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.")
|
||||
parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.")
|
||||
parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")
|
||||
|
Loading…
Reference in New Issue
Block a user