Fix minor comment typos (#15740)

2025-08-03 03:31:05 +06:00 · 2022-02-21 12:41:27 +01:00 · 2022-02-21 12:41:27 +01:00 · 5444687f0f
commit 5444687f0f
parent a63bd3675f
1 changed files with 3 additions and 3 deletions
--- a/examples/research_projects/distillation/train.py
+++ b/examples/research_projects/distillation/train.py
@ -133,7 +133,7 @@ def main():
        "--alpha_mlm",
        default=0.0,
        type=float,
-        help="Linear weight for the MLM loss. Must be >=0. Should be used in coonjunction with `mlm` flag.",
+        help="Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag.",
    )
    parser.add_argument("--alpha_clm", default=0.5, type=float, help="Linear weight for the CLM loss. Must be >=0.")
    parser.add_argument("--alpha_mse", default=0.0, type=float, help="Linear weight of the MSE loss. Must be >=0.")
@ -164,7 +164,7 @@ def main():
    parser.add_argument(
        "--restrict_ce_to_mask",
        action="store_true",
-        help="If true, compute the distilation loss only the [MLM] prediction distribution.",
+        help="If true, compute the distillation loss only the [MLM] prediction distribution.",
    )
    parser.add_argument(
        "--freeze_pos_embs",
@ -192,7 +192,7 @@ def main():
        help="Gradient accumulation for larger training batches.",
    )
    parser.add_argument("--warmup_prop", default=0.05, type=float, help="Linear warmup proportion.")
-    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
+    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")