[cleanup] remove old scripts in /scripts 🧹 🧹 (#37676)

* rm old files * not this one
2025-07-31 02:02:21 +06:00 · 2025-04-22 16:59:03 +01:00 · 2025-04-22 16:59:03 +01:00 · 0f8c34b0a0
commit 0f8c34b0a0
parent 6673081b21
21 changed files with 1 additions and 1919 deletions
--- a/scripts/benchmark/trainer-benchmark.py
+++ b/scripts/benchmark/trainer-benchmark.py
@ -1,448 +0,0 @@
-#!/usr/bin/env python
-
-# HF Trainer benchmarking tool
-#
-# This tool can be used to run and compare multiple dimensions of the HF Trainers args.
-#
-# It then prints a report once in github format with all the information that needs to be shared
-# with others and second time in a console-friendly format, so it's easier to use for tuning things up.
-#
-# The main idea is:
-#
-#     ./trainer-benchmark.py --base-cmd '<cmd args that don't change>' \
-#     --variations '--tf32 0|--tf32 1' '--fp16 0|--fp16 1|--bf16 1' \
-#     --target-metric-key train_samples_per_second
-#
-# The variations can be any command line argument that you want to compare and not just dtype as in
-# the example.
-#
-# --variations allows you to compare variations in multiple dimensions.
-#
-# as the first dimension has 2 options and the second 3 in our example, this will run the trainer 6
-# times adding one of:
-#
-#    1. --tf32 0 --fp16 0
-#    2. --tf32 0 --fp16 1
-#    3. --tf32 0 --bf16 1
-#    4. --tf32 1 --fp16 0
-#    5. --tf32 1 --fp16 1
-#    6. --tf32 1 --bf16 1
-#
-# and print the results. This is just a cartesian product - and more than 2 dimensions can be used.
-#
-# If you want to rely on defaults, this:
-#    --variations '--tf32 0|--tf32 1' '--fp16 0|--fp16 1|--bf16 1'
-# is identical to this:
-#    --variations '--tf32 0|--tf32 1' '|--fp16|--bf16'
-#
-# the leading empty variation in the 2nd dimension is a valid variation.
-#
-# So here we get the following 6 variations:
-#
-#    1. --tf32 0
-#    2. --tf32 0 --fp16
-#    3. --tf32 0 --bf16
-#    4. --tf32 1
-#    5. --tf32 1 --fp16
-#    6. --tf32 1 --bf16
-#
-# In this particular case we don't know what the default tf32 setting is as it's normally
-# pytorch-version dependent). That's why it's best to do an explicit setting of each variation:
-#    `--tf32 0|--tf32 1`
-#
-# Here is a full example of a train:
-#
-# CUDA_VISIBLE_DEVICES=0 python ./scripts/benchmark/trainer-benchmark.py \
-# --base-cmd \
-# ' examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small \
-# --output_dir output_dir --do_train --label_smoothing 0.1 --logging_strategy no \
-# --save_strategy no --per_device_train_batch_size 32 --max_source_length 512 \
-# --max_target_length 512 --num_train_epochs 1 --overwrite_output_dir \
-# --source_lang en --target_lang ro --dataset_name wmt16 --dataset_config "ro-en" \
-# --source_prefix "translate English to Romanian: " --warmup_steps 50 \
-# --max_train_samples 20000 --dataloader_num_workers 2 ' \
-# --target-metric-key train_samples_per_second --repeat-times 1 --variations \
-# '|--fp16|--bf16' '--tf32 0|--tf32 1' --report-metric-keys train_loss \
-# --repeat-times 1 --base-variation '--tf32 0'
-#
-# and here is a possible output:
-#
-#
-# | Variation       |     Train |   Diff |   Train |
-# |                 |   samples |      % |    loss |
-# |                 |       per |        |         |
-# |                 |    second |        |         |
-# |:----------------|----------:|-------:|--------:|
-# | --tf32 0        |    285.11 |      0 |    2.51 |
-# | --tf32 1        |    342.09 |     20 |    2.51 |
-# | --fp16 --tf32 0 |    423.49 |     49 |    2.51 |
-# | --fp16 --tf32 1 |    423.13 |     48 |    2.51 |
-# | --bf16 --tf32 0 |    416.80 |     46 |    2.52 |
-# | --bf16 --tf32 1 |    415.87 |     46 |    2.52 |
-#
-#
-# So you can quickly compare the different outcomes.
-#
-# Typically running each experiment once is enough, but if the environment is unstable you can
-# re-run each multiple times, e.g., 3 using --repeat-times 3 and it will report the averaged results.
-#
-# By default it'll use the lowest result as the base line to use as 100% and then compare the rest to
-# it as can be seen from the table above, but you can also specify which combination is the one to use as
-# the baseline, e.g., to change to another entry use: --base-variation '--tf32 1 --fp16 0'
-#
-# --target-metric-key is there to tell the program which metrics to compare - the different metric keys are
-# inside output_dir/all_results.json. e.g., to measure eval performance instead of train use:
-#    --target-metric-key eval_samples_per_second
-# but of course you will need to adjust the --base-cmd value in the example to perform evaluation as
-# well (as currently it doesn't)
-#
-
-import argparse
-import datetime
-import io
-import itertools
-import json
-import math
-import os
-import platform
-import re
-import shlex
-import subprocess
-import sys
-from pathlib import Path
-from statistics import fmean
-
-import pandas as pd
-import torch
-from tqdm import tqdm
-
-import transformers
-
-
-nan = float("nan")
-
-
-class Tee:
-    """
-    A helper class to tee print's output into a file.
-    Usage:
-    sys.stdout = Tee(filename)
-    """
-
-    def __init__(self, filename):
-        self.stdout = sys.stdout
-        self.file = open(filename, "a")
-
-    def __getattr__(self, attr):
-        return getattr(self.stdout, attr)
-
-    def write(self, msg):
-        self.stdout.write(msg)
-        # strip tqdm codes
-        self.file.write(re.sub(r"^.*\r", "", msg, 0, re.M))
-
-
-def get_original_command(max_width=80, full_python_path=False):
-    """
-    Return the original command line string that can be replayed nicely and wrapped for 80 char width.
-
-    Args:
-        max_width (`int`, *optional*, defaults to 80):
-            The width to wrap for.
-        full_python_path (`bool`, `optional`, defaults to `False`):
-             Whether to replicate the full path or just the last segment (i.e. `python`).
-    """
-
-    cmd = []
-
-    # deal with critical env vars
-    env_keys = ["CUDA_VISIBLE_DEVICES"]
-    for key in env_keys:
-        val = os.environ.get(key, None)
-        if val is not None:
-            cmd.append(f"{key}={val}")
-
-    # python executable (not always needed if the script is executable)
-    python = sys.executable if full_python_path else sys.executable.split("/")[-1]
-    cmd.append(python)
-
-    # now the normal args
-    cmd += list(map(shlex.quote, sys.argv))
-
-    # split up into up to MAX_WIDTH lines with shell multi-line escapes
-    lines = []
-    current_line = ""
-    while len(cmd) > 0:
-        current_line += f"{cmd.pop(0)} "
-        if len(cmd) == 0 or len(current_line) + len(cmd[0]) + 1 > max_width - 1:
-            lines.append(current_line)
-            current_line = ""
-    return "\\\n".join(lines)
-
-
-def get_base_command(args, output_dir):
-
-    # unwrap multi-line input
-    args.base_cmd = re.sub(r"[\\\n]+", " ", args.base_cmd)
-
-    # remove --output_dir if any and set our own
-    args.base_cmd = re.sub("--output_dir\s+[^\s]+", "", args.base_cmd)
-    args.base_cmd += f" --output_dir {output_dir}"
-
-    # ensure we have --overwrite_output_dir
-    args.base_cmd = re.sub("--overwrite_output_dir\s+", "", args.base_cmd)
-    args.base_cmd += " --overwrite_output_dir"
-
-    return [sys.executable] + shlex.split(args.base_cmd)
-
-
-def process_run_single(id, cmd, variation, output_dir, target_metric_key, metric_keys, verbose):
-
-    # Enable to debug everything but the run itself, to do it fast and see the progress.
-    # This is useful for debugging the output formatting quickly - we can remove it later once
-    # everybody is happy with the output
-    if 0:
-        import random
-        from time import sleep
-
-        sleep(0)
-        return dict(
-            {k: random.uniform(0, 100) for k in metric_keys},
-            **{target_metric_key: random.choice([nan, 10.31, 100.2, 55.6666, 222.22222222])},
-        )
-
-    result = subprocess.run(cmd, capture_output=True, text=True)
-
-    if verbose:
-        print("STDOUT", result.stdout)
-        print("STDERR", result.stderr)
-
-    # save the streams
-    prefix = variation.replace(" ", "-")
-    with open(Path(output_dir) / f"log.{prefix}.stdout.txt", "w") as f:
-        f.write(result.stdout)
-    with open(Path(output_dir) / f"log.{prefix}.stderr.txt", "w") as f:
-        f.write(result.stderr)
-
-    if result.returncode != 0:
-        if verbose:
-            print("failed")
-        return {target_metric_key: nan}
-
-    with io.open(f"{output_dir}/all_results.json", "r", encoding="utf-8") as f:
-        metrics = json.load(f)
-
-    # filter out just the keys we want
-    return {k: v for k, v in metrics.items() if k in metric_keys}
-
-
-def process_run(
-    id,
-    cmd,
-    variation_key,
-    variation,
-    longest_variation_len,
-    target_metric_key,
-    report_metric_keys,
-    repeat_times,
-    output_dir,
-    verbose,
-):
-    results = []
-    metrics = []
-    preamble = f"{id}: {variation:<{longest_variation_len}}"
-    outcome = f"{preamble}: "
-    metric_keys = set(report_metric_keys + [target_metric_key])
-    for i in tqdm(range(repeat_times), desc=preamble, leave=False):
-        single_run_metrics = process_run_single(
-            id, cmd, variation, output_dir, target_metric_key, metric_keys, verbose
-        )
-        result = single_run_metrics[target_metric_key]
-        if not math.isnan(result):
-            metrics.append(single_run_metrics)
-            results.append(result)
-            outcome += "✓"
-        else:
-            outcome += "✘"
-    outcome = f"\33[2K\r{outcome}"
-    if len(metrics) > 0:
-        mean_metrics = {k: fmean([x[k] for x in metrics]) for k in metrics[0].keys()}
-        mean_target = round(mean_metrics[target_metric_key], 2)
-        results_str = f"{outcome} {mean_target}"
-        if len(metrics) > 1:
-            results_str += f" {tuple(round(x, 2) for x in results)}"
-        print(results_str)
-        mean_metrics[variation_key] = variation
-        return mean_metrics
-    else:
-        print(outcome)
-        return {variation_key: variation, target_metric_key: nan}
-
-
-def get_versions():
-    properties = torch.cuda.get_device_properties(torch.device("cuda"))
-    return f"""
-Datetime    : {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-
-Software:
-transformers: {transformers.__version__}
-torch       : {torch.__version__}
-cuda        : {torch.version.cuda}
-python      : {platform.python_version()}
-
-Hardware:
-{torch.cuda.device_count()} GPUs      : {properties.name}, {properties.total_memory/2**30:0.2f}GB
-"""
-
-
-def process_results(results, target_metric_key, report_metric_keys, base_variation, output_dir):
-
-    df = pd.DataFrame(results)
-    variation_key = "variation"
-    diff_key = "diff_%"
-
-    sentinel_value = nan
-    if base_variation is not None and len(df[df[variation_key] == base_variation]):
-        # this may still return nan
-        sentinel_value = df.loc[df[variation_key] == base_variation][target_metric_key].item()
-    if math.isnan(sentinel_value):
-        # as a fallback, use the minimal value as the sentinel
-        sentinel_value = df.loc[df[target_metric_key] != nan][target_metric_key].min()
-
-    # create diff column if possible
-    if not math.isnan(sentinel_value):
-        df[diff_key] = df.apply(
-            lambda r: round(100 * (r[target_metric_key] - sentinel_value) / sentinel_value)
-            if not math.isnan(r[target_metric_key])
-            else 0,
-            axis="columns",
-        )
-
-    # re-order columns
-    cols = [variation_key, target_metric_key, diff_key, *report_metric_keys]
-    df = df.reindex(cols, axis="columns")  # reorder cols
-
-    # capitalize
-    df = df.rename(str.capitalize, axis="columns")
-
-    # make the cols as narrow as possible
-    df_github = df.rename(lambda c: c.replace("_", "<br>"), axis="columns")
-    df_console = df.rename(lambda c: c.replace("_", "\n"), axis="columns")
-
-    report = ["", "Copy between the cut-here-lines and paste as is to github or a forum"]
-    report += ["----------8<-----------------8<--------"]
-    report += ["*** Results:", df_github.to_markdown(index=False, floatfmt=".2f")]
-    report += ["```"]
-    report += ["*** Setup:", get_versions()]
-    report += ["*** The benchmark command line was:", get_original_command()]
-    report += ["```"]
-    report += ["----------8<-----------------8<--------"]
-    report += ["*** Results (console):", df_console.to_markdown(index=False, floatfmt=".2f")]
-
-    print("\n\n".join(report))
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--base-cmd",
-        default=None,
-        type=str,
-        required=True,
-        help="Base cmd",
-    )
-    parser.add_argument(
-        "--variations",
-        default=None,
-        type=str,
-        nargs="+",
-        required=True,
-        help="Multi-dimensional variations, example: '|--fp16|--bf16' '|--tf32'",
-    )
-    parser.add_argument(
-        "--base-variation",
-        default=None,
-        type=str,
-        help="Baseline variation to compare to. if None the minimal target value will be used to compare against",
-    )
-    parser.add_argument(
-        "--target-metric-key",
-        default=None,
-        type=str,
-        required=True,
-        help="Target metric key in output_dir/all_results.json, e.g., train_samples_per_second",
-    )
-    parser.add_argument(
-        "--report-metric-keys",
-        default="",
-        type=str,
-        help="Report metric keys - other metric keys from output_dir/all_results.json to report, e.g., train_loss. Use a single argument e.g., 'train_loss train_samples",
-    )
-    parser.add_argument(
-        "--repeat-times",
-        default=1,
-        type=int,
-        help="How many times to re-run each variation - an average will be reported",
-    )
-    parser.add_argument(
-        "--output_dir",
-        default="output_benchmark",
-        type=str,
-        help="The output directory where all the benchmark reports will go to and additionally this directory will be used to override --output_dir in the script that is being benchmarked",
-    )
-    parser.add_argument(
-        "--verbose",
-        default=False,
-        action="store_true",
-        help="Whether to show the outputs of each run or just the benchmark progress",
-    )
-    args = parser.parse_args()
-
-    output_dir = args.output_dir
-    Path(output_dir).mkdir(exist_ok=True)
-    base_cmd = get_base_command(args, output_dir)
-
-    # split each dimension into its --foo variations
-    dims = [list(map(str.strip, re.split(r"\|", x))) for x in args.variations]
-    # build a cartesian product of dimensions and convert those back into cmd-line arg strings,
-    # while stripping white space for inputs that were empty
-    variations = list(map(str.strip, map(" ".join, itertools.product(*dims))))
-    longest_variation_len = max(len(x) for x in variations)
-
-    # split wanted keys
-    report_metric_keys = args.report_metric_keys.split()
-
-    # capture prints into a log file for convenience
-    report_fn = f"benchmark-report-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.txt"
-    print(f"\nNote: each run's output is also logged under {output_dir}/log.*.std*.txt")
-    print(f"and this script's output is also piped into {report_fn}")
-
-    sys.stdout = Tee(report_fn)
-
-    print(f"\n*** Running {len(variations)} benchmarks:")
-    print(f"Base command: {' '.join(base_cmd)}")
-
-    variation_key = "variation"
-    results = []
-    for id, variation in enumerate(tqdm(variations, desc="Total completion: ", leave=False)):
-        cmd = base_cmd + variation.split()
-        results.append(
-            process_run(
-                id + 1,
-                cmd,
-                variation_key,
-                variation,
-                longest_variation_len,
-                args.target_metric_key,
-                report_metric_keys,
-                args.repeat_times,
-                output_dir,
-                args.verbose,
-            )
-        )
-
-    process_results(results, args.target_metric_key, report_metric_keys, args.base_variation, output_dir)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/deberta_scrtipt.py
+++ b/scripts/deberta_scrtipt.py
@ -1,85 +0,0 @@
-import time
-
-import torch
-
-from transformers import AutoModel, AutoTokenizer, pipeline
-
-
-test_sentence = 'Do you [MASK] the muffin man?'
-
-# for comparison
-bert = pipeline('fill-mask', model = 'bert-base-uncased')
-print('\n'.join([d['sequence'] for d in bert(test_sentence)]))
-
-
-deberta = pipeline('fill-mask', model = 'microsoft/deberta-v3-base', model_kwargs={"legacy": False})
-print('\n'.join([d['sequence'] for d in deberta(test_sentence)]))
-
-
-tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
-
-tokenized_dict = tokenizer(
-    ["Is this working",], ["Not yet",],
-    return_tensors="pt"
-)
-
-deberta.model.forward = torch.compile(deberta.model.forward)
-start=time.time()
-deberta.model(**tokenized_dict)
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-deberta.model(**tokenized_dict)
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-deberta.model(**tokenized_dict)
-end=time.time()
-print(end-start)
-
-
-model = AutoModel.from_pretrained('microsoft/deberta-base')
-model.config.return_dict = False
-model.config.output_hidden_states=False
-input_tuple = (tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
-
-
-start=time.time()
-traced_model = torch.jit.trace(model, input_tuple)
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
-end=time.time()
-print(end-start)
-
-
-start=time.time()
-traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
-end=time.time()
-print(end-start)
-
-
-torch.jit.save(traced_model, "compiled_deberta.pt")
-
-
-
-# my_script_module = torch.jit.script(model)
--- a/scripts/fsmt/convert-allenai-wmt16.sh
+++ b/scripts/fsmt/convert-allenai-wmt16.sh
@ -1,71 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script acquires data and converts it to fsmt model
-# it covers:
-# - allenai/wmt16-en-de-dist-12-1
-# - allenai/wmt16-en-de-dist-6-1
-# - allenai/wmt16-en-de-12-1
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-mkdir data
-
-# get data (run once)
-
-cd data
-gdown 'https://drive.google.com/uc?id=1x_G2cjvM1nW5hjAB8-vWxRqtQTlmIaQU'
-gdown 'https://drive.google.com/uc?id=1oA2aqZlVNj5FarxBlNXEHpBS4lRetTzU'
-gdown 'https://drive.google.com/uc?id=1Wup2D318QYBFPW_NKI1mfP_hXOfmUI9r'
-tar -xvzf trans_ende_12-1_0.2.tar.gz
-tar -xvzf trans_ende-dist_12-1_0.2.tar.gz
-tar -xvzf trans_ende-dist_6-1_0.2.tar.gz
-gdown 'https://drive.google.com/uc?id=1mNufoynJ9-Zy1kJh2TA_lHm2squji0i9'
-gdown 'https://drive.google.com/uc?id=1iO7um-HWoNoRKDtw27YUSgyeubn9uXqj'
-tar -xvzf wmt16.en-de.deep-shallow.dist.tar.gz
-tar -xvzf wmt16.en-de.deep-shallow.tar.gz
-cp wmt16.en-de.deep-shallow/data-bin/dict.*.txt trans_ende_12-1_0.2
-cp wmt16.en-de.deep-shallow.dist/data-bin/dict.*.txt trans_ende-dist_12-1_0.2
-cp wmt16.en-de.deep-shallow.dist/data-bin/dict.*.txt trans_ende-dist_6-1_0.2
-cp wmt16.en-de.deep-shallow/bpecodes trans_ende_12-1_0.2
-cp wmt16.en-de.deep-shallow.dist/bpecodes trans_ende-dist_12-1_0.2
-cp wmt16.en-de.deep-shallow.dist/bpecodes trans_ende-dist_6-1_0.2
-cd -
-
-# run conversions and uploads
-
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/trans_ende-dist_12-1_0.2/checkpoint_top5_average.pt --pytorch_dump_folder_path data/wmt16-en-de-dist-12-1
-
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/trans_ende-dist_6-1_0.2/checkpoint_top5_average.pt --pytorch_dump_folder_path data/wmt16-en-de-dist-6-1
-
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/trans_ende_12-1_0.2/checkpoint_top5_average.pt --pytorch_dump_folder_path data/wmt16-en-de-12-1
-
-
-# upload
-cd data
-transformers-cli upload -y wmt16-en-de-dist-12-1
-transformers-cli upload -y wmt16-en-de-dist-6-1
-transformers-cli upload -y wmt16-en-de-12-1
-cd -
-
-
-# if updating just small files and not the large models, here is a script to generate the right commands:
-perl -le 'for $f (@ARGV) { print qq[transformers-cli upload -y $_/$f --filename $_/$f] for ("wmt16-en-de-dist-12-1", "wmt16-en-de-dist-6-1", "wmt16-en-de-12-1")}' vocab-src.json vocab-tgt.json tokenizer_config.json config.json
-# add/remove files as needed
-
--- a/scripts/fsmt/convert-allenai-wmt19.sh
+++ b/scripts/fsmt/convert-allenai-wmt19.sh
@ -1,59 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script acquires data and converts it to fsmt model
-# it covers:
-# - allenai/wmt19-de-en-6-6-base
-# - allenai/wmt19-de-en-6-6-big
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-mkdir data
-
-# get data (run once)
-
-cd data
-gdown 'https://drive.google.com/uc?id=1j6z9fYdlUyOYsh7KJoumRlr1yHczxR5T'
-gdown 'https://drive.google.com/uc?id=1yT7ZjqfvUYOBXvMjeY8uGRHQFWoSo8Q5'
-gdown 'https://drive.google.com/uc?id=15gAzHeRUCs-QV8vHeTReMPEh1j8excNE'
-tar -xvzf wmt19.de-en.tar.gz
-tar -xvzf wmt19_deen_base_dr0.1_1.tar.gz
-tar -xvzf wmt19_deen_big_dr0.1_2.tar.gz
-cp wmt19.de-en/data-bin/dict.*.txt wmt19_deen_base_dr0.1_1
-cp wmt19.de-en/data-bin/dict.*.txt wmt19_deen_big_dr0.1_2
-cd -
-
-# run conversions and uploads
-
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19_deen_base_dr0.1_1/checkpoint_last3_avg.pt --pytorch_dump_folder_path data/wmt19-de-en-6-6-base
-
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19_deen_big_dr0.1_2/checkpoint_last3_avg.pt --pytorch_dump_folder_path data/wmt19-de-en-6-6-big
-
-
-# upload
-cd data
-transformers-cli upload -y wmt19-de-en-6-6-base
-transformers-cli upload -y wmt19-de-en-6-6-big
-cd -
-
-
-# if updating just small files and not the large models, here is a script to generate the right commands:
-perl -le 'for $f (@ARGV) { print qq[transformers-cli upload -y $_/$f --filename $_/$f] for ("wmt19-de-en-6-6-base", "wmt19-de-en-6-6-big")}' vocab-src.json vocab-tgt.json tokenizer_config.json config.json
-# add/remove files as needed
-
--- a/scripts/fsmt/convert-facebook-wmt19.sh
+++ b/scripts/fsmt/convert-facebook-wmt19.sh
@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script acquires data and converts it to fsmt model
-# it covers:
-# - facebook/wmt19-ru-en
-# - facebook/wmt19-en-ru
-# - facebook/wmt19-de-en
-# - facebook/wmt19-en-de
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-mkdir data
-
-# get data (run once)
-
-cd data
-wget https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz
-wget https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz
-wget https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz
-wget https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz
-tar -xvzf wmt19.en-de.joined-dict.ensemble.tar.gz
-tar -xvzf wmt19.de-en.joined-dict.ensemble.tar.gz
-tar -xvzf wmt19.en-ru.ensemble.tar.gz
-tar -xvzf wmt19.ru-en.ensemble.tar.gz
-cd -
-
-# run conversions and uploads
-
-export PAIR=ru-en
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19.$PAIR.ensemble/model4.pt --pytorch_dump_folder_path data/wmt19-$PAIR
-
-export PAIR=en-ru
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19.$PAIR.ensemble/model4.pt --pytorch_dump_folder_path data/wmt19-$PAIR
-
-export PAIR=de-en
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19.$PAIR.joined-dict.ensemble/model4.pt --pytorch_dump_folder_path data/wmt19-$PAIR
-
-export PAIR=en-de
-PYTHONPATH="src" python src/transformers/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py --fsmt_checkpoint_path data/wmt19.$PAIR.joined-dict.ensemble/model4.pt --pytorch_dump_folder_path data/wmt19-$PAIR
-
-
-# upload
-cd data
-transformers-cli upload -y wmt19-ru-en
-transformers-cli upload -y wmt19-en-ru
-transformers-cli upload -y wmt19-de-en
-transformers-cli upload -y wmt19-en-de
-cd -
-
-# if updating just small files and not the large models, here is a script to generate the right commands:
-perl -le 'for $f (@ARGV) { print qq[transformers-cli upload -y $_/$f --filename $_/$f] for map { "wmt19-$_" } ("en-ru", "ru-en", "de-en", "en-de")}' vocab-src.json vocab-tgt.json tokenizer_config.json config.json
-# add/remove files as needed
-
--- a/scripts/fsmt/eval-allenai-wmt16.sh
+++ b/scripts/fsmt/eval-allenai-wmt16.sh
@ -1,79 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script evals the following fsmt models
-# it covers:
-# - allenai/wmt16-en-de-dist-12-1
-# - allenai/wmt16-en-de-dist-6-1
-# - allenai/wmt16-en-de-12-1
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-# In these scripts you may have to lower BS if you get CUDA OOM (or increase it if you have a large GPU)
-
-### Normal eval ###
-
-export PAIR=en-de
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=64
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-
-MODEL_PATH=allenai/wmt16-en-de-dist-12-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-MODEL_PATH=allenai/wmt16-en-de-dist-6-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-MODEL_PATH=allenai/wmt16-en-de-12-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-
-
-### Searching hparams eval ###
-
-
-export PAIR=en-de
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=32
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-
-MODEL_PATH=allenai/wmt16-en-de-dist-12-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:10:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
-
-
-MODEL_PATH=allenai/wmt16-en-de-dist-6-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:10:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
-
-
-MODEL_PATH=allenai/wmt16-en-de-12-1
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:10:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
--- a/scripts/fsmt/eval-allenai-wmt19.sh
+++ b/scripts/fsmt/eval-allenai-wmt19.sh
@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script evals the following fsmt models
-# it covers:
-# - allenai/wmt19-de-en-6-6-base
-# - allenai/wmt19-de-en-6-6-big
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-# In these scripts you may have to lower BS if you get CUDA OOM (or increase it if you have a large GPU)
-
-### Normal eval ###
-
-export PAIR=de-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=64
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-
-MODEL_PATH=allenai/wmt19-de-en-6-6-base
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-MODEL_PATH=allenai/wmt19-de-en-6-6-big
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-
-
-### Searching hparams eval ###
-
-export PAIR=de-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=16
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-
-MODEL_PATH=allenai/wmt19-de-en-6-6-base
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:10:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
-
-MODEL_PATH=allenai/wmt19-de-en-6-6-big
-echo $PAIR $MODEL_PATH
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_PATH $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:10:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
--- a/scripts/fsmt/eval-facebook-wmt19.sh
+++ b/scripts/fsmt/eval-facebook-wmt19.sh
@ -1,161 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script evals the following fsmt models
-# it covers:
-# - facebook/wmt19-ru-en
-# - facebook/wmt19-en-ru
-# - facebook/wmt19-de-en
-# - facebook/wmt19-en-de
-
-
-# this script needs to be run from the top level of the transformers repo
-if [ ! -d "src/transformers" ]; then
-    echo "Error: This script needs to be run from the top of the transformers repo"
-    exit 1
-fi
-
-
-# In these scripts you may have to lower BS if you get CUDA OOM (or increase it if you have a large GPU)
-
-### a short estimate version for quick testing ###
-
-export PAIR=en-ru
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=8
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src | head -10 > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref | head -10 > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-
-
-### Normal eval ###
-
-# ru-en
-
-export PAIR=ru-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=50
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-
-# (target BLEU: 41.3 http://matrix.statmt.org/matrix/output/1907?run_id=6937)
-
-
-# en-ru
-
-export PAIR=en-ru
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=50
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-# (target BLEU: 36.4 http://matrix.statmt.org/matrix/output/1914?score_id=37605)
-
-
-
-# en-de
-
-export PAIR=en-de
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-# (target BLEU: 43.1 http://matrix.statmt.org/matrix/output/1909?run_id=6862)
-
-
-# de-en
-
-export PAIR=de-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=50
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-
-# (target BLEU: 42.3 http://matrix.statmt.org/matrix/output/1902?run_id=6750)
-
-
-### Searching hparams eval ###
-
-# en-ru
-
-export PAIR=ru-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=32
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-CUDA_VISIBLE_DEVICES="0" PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1"
-
-
-# en-ru
-
-export PAIR=en-ru
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=16
-mkdir -p $DATA_DIR
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-CUDA_VISIBLE_DEVICES="0" PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:8:11:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1 early_stopping=true:false"
-
-# en-de
-
-export PAIR=en-de
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=16
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-CUDA_VISIBLE_DEVICES="1" PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:8:11:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1 early_stopping=true:false"
-
-# de-en
-
-export PAIR=de-en
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=16
-mkdir -p $DATA_DIR
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-CUDA_VISIBLE_DEVICES="1" PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --search="num_beams=5:8:11:15 length_penalty=0.6:0.7:0.8:0.9:1.0:1.1 early_stopping=true:false"
--- a/scripts/fsmt/fsmt-make-super-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-super-tiny-model.py
@ -1,88 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script creates a super tiny model that is useful inside tests, when we just want to test that
-# the machinery works, without needing to the check the quality of the outcomes.
-#
-# This version creates a tiny vocab first, and then a tiny model - so the outcome is truly tiny -
-# all files ~60KB. As compared to taking a full-size model, reducing to the minimum its layers and
-# emb dimensions, but keeping the full vocab + merges files, leading to ~3MB in total for all files.
-# The latter is done by `fsmt-make-super-tiny-model.py`.
-#
-# It will be used then as "stas/tiny-wmt19-en-ru"
-
-import json
-import tempfile
-from pathlib import Path
-
-from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
-from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES
-
-
-mname_tiny = "tiny-wmt19-en-ru"
-
-# Build
-
-# borrowed from a test
-vocab = [ "l", "o", "w", "e", "r", "s", "t", "i", "d", "n", "w</w>", "r</w>", "t</w>", "lo", "low", "er</w>", "low</w>", "lowest</w>", "newer</w>", "wider</w>", "<unk>", ]
-vocab_tokens = dict(zip(vocab, range(len(vocab))))
-merges = ["l o 123", "lo w 1456", "e r</w> 1789", ""]
-
-with tempfile.TemporaryDirectory() as tmpdirname:
-    build_dir = Path(tmpdirname)
-    src_vocab_file = build_dir / VOCAB_FILES_NAMES["src_vocab_file"]
-    tgt_vocab_file = build_dir / VOCAB_FILES_NAMES["tgt_vocab_file"]
-    merges_file = build_dir / VOCAB_FILES_NAMES["merges_file"]
-    with open(src_vocab_file, "w") as fp: fp.write(json.dumps(vocab_tokens))
-    with open(tgt_vocab_file, "w") as fp: fp.write(json.dumps(vocab_tokens))
-    with open(merges_file, "w") as fp   : fp.write("\n".join(merges))
-
-    tokenizer = FSMTTokenizer(
-        langs=["en", "ru"],
-        src_vocab_size = len(vocab),
-        tgt_vocab_size = len(vocab),
-        src_vocab_file=src_vocab_file,
-        tgt_vocab_file=tgt_vocab_file,
-        merges_file=merges_file,
-    )
-
-config = FSMTConfig(
-    langs=['ru', 'en'],
-    src_vocab_size=1000, tgt_vocab_size=1000,
-    d_model=4,
-    encoder_layers=1, decoder_layers=1,
-    encoder_ffn_dim=4, decoder_ffn_dim=4,
-    encoder_attention_heads=1, decoder_attention_heads=1,
-)
-
-tiny_model = FSMTForConditionalGeneration(config)
-print(f"num of params {tiny_model.num_parameters()}")
-
-# Test
-batch = tokenizer(["Making tiny model"], return_tensors="pt")
-outputs = tiny_model(**batch)
-
-print("test output:", len(outputs.logits[0]))
-
-# Save
-tiny_model.half() # makes it smaller
-tiny_model.save_pretrained(mname_tiny)
-tokenizer.save_pretrained(mname_tiny)
-
-print(f"Generated {mname_tiny}")
-
-# Upload
-# transformers-cli upload tiny-wmt19-en-ru
--- a/scripts/fsmt/fsmt-make-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-tiny-model.py
@ -1,61 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script creates a super tiny model that is useful inside tests, when we just want to test that
-# the machinery works, without needing to the check the quality of the outcomes.
-#
-# This version creates a tiny model through reduction of a normal pre-trained model, but keeping the
-# full vocab, merges file, and thus also resulting in a larger model due to a large vocab size.
-# This gives ~3MB in total for all files.
-#
-# If you want a 50 times smaller than this see `fsmt-make-super-tiny-model.py`, which is slightly more complicated
-#
-#
-# It will be used then as "stas/tiny-wmt19-en-de"
-
-# Build
-from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
-
-
-mname = "facebook/wmt19-en-de"
-tokenizer = FSMTTokenizer.from_pretrained(mname)
-# get the correct vocab sizes, etc. from the master model
-config = FSMTConfig.from_pretrained(mname)
-config.update({
-    "d_model": 4,
-    "encoder_layers": 1, "decoder_layers": 1,
-    "encoder_ffn_dim": 4, "decoder_ffn_dim": 4,
-    "encoder_attention_heads": 1, "decoder_attention_heads": 1})
-
-tiny_model = FSMTForConditionalGeneration(config)
-print(f"num of params {tiny_model.num_parameters()}")
-
-# Test
-batch = tokenizer(["Making tiny model"], return_tensors="pt")
-outputs = tiny_model(**batch)
-
-print("test output:", len(outputs.logits[0]))
-
-# Save
-mname_tiny = "tiny-wmt19-en-de"
-tiny_model.half() # makes it smaller
-tiny_model.save_pretrained(mname_tiny)
-tokenizer.save_pretrained(mname_tiny)
-
-print(f"Generated {mname_tiny}")
-
-# Upload
-# transformers-cli upload tiny-wmt19-en-de
--- a/scripts/fsmt/gen-card-allenai-wmt16.py
+++ b/scripts/fsmt/gen-card-allenai-wmt16.py
@ -1,156 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Usage:
-# ./gen-card-allenai-wmt16.py
-
-import os
-from pathlib import Path
-
-
-def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
-
-    texts = {
-        "en": "Machine learning is great, isn't it?",
-        "ru": "Машинное обучение - это здорово, не так ли?",
-        "de": "Maschinelles Lernen ist großartig, nicht wahr?",
-    }
-
-    # BLUE scores as follows:
-    # "pair": [fairseq, transformers]
-    scores = {
-        "wmt16-en-de-dist-12-1": [28.3, 27.52],
-        "wmt16-en-de-dist-6-1": [27.4, 27.11],
-        "wmt16-en-de-12-1": [26.9, 25.75],
-    }
-    pair = f"{src_lang}-{tgt_lang}"
-
-    readme = f"""
---
-language:
- {src_lang}
- {tgt_lang}
-thumbnail:
-tags:
- translation
- wmt16
- allenai
-license: apache-2.0
-datasets:
- wmt16
-metrics:
- bleu
---
-
-# FSMT
-
-## Model description
-
-This is a ported version of fairseq-based [wmt16 transformer](https://github.com/jungokasai/deep-shallow/) for {src_lang}-{tgt_lang}.
-
-For more details, please, see [Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation](https://arxiv.org/abs/2006.10369).
-
-All 3 models are available:
-
-* [wmt16-en-de-dist-12-1](https://huggingface.co/allenai/wmt16-en-de-dist-12-1)
-* [wmt16-en-de-dist-6-1](https://huggingface.co/allenai/wmt16-en-de-dist-6-1)
-* [wmt16-en-de-12-1](https://huggingface.co/allenai/wmt16-en-de-12-1)
-
-
-## Intended uses & limitations
-
-#### How to use
-
-```python
-from transformers import FSMTForConditionalGeneration, FSMTTokenizer
-mname = "allenai/{model_name}"
-tokenizer = FSMTTokenizer.from_pretrained(mname)
-model = FSMTForConditionalGeneration.from_pretrained(mname)
-
-input = "{texts[src_lang]}"
-input_ids = tokenizer.encode(input, return_tensors="pt")
-outputs = model.generate(input_ids)
-decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print(decoded) # {texts[tgt_lang]}
-
-```
-
-#### Limitations and bias
-
-
-## Training data
-
-Pretrained weights were left identical to the original model released by allenai. For more details, please, see the [paper](https://arxiv.org/abs/2006.10369).
-
-## Eval results
-
-Here are the BLEU scores:
-
-model   | fairseq | transformers
-------|---------|----------
-{model_name}  | {scores[model_name][0]} | {scores[model_name][1]}
-
-The score is slightly below the score reported in the paper, as the researchers don't use `sacrebleu` and measure the score on tokenized outputs. `transformers` score was measured using `sacrebleu` on detokenized outputs.
-
-The score was calculated using this code:
-
-```bash
-git clone https://github.com/huggingface/transformers
-cd transformers
-export PAIR={pair}
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt16 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt16 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py allenai/{model_name} $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-```
-
-## Data Sources
-
- [training, etc.](http://www.statmt.org/wmt16/)
- [test set](http://matrix.statmt.org/test_sets/newstest2016.tgz?1504722372)
-
-
-### BibTeX entry and citation info
-
-```
-@misc{{kasai2020deep,
-    title={{Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation}},
-    author={{Jungo Kasai and Nikolaos Pappas and Hao Peng and James Cross and Noah A. Smith}},
-    year={{2020}},
-    eprint={{2006.10369}},
-    archivePrefix={{arXiv}},
-    primaryClass={{cs.CL}}
-}}
-```
-
-"""
-    model_card_dir.mkdir(parents=True, exist_ok=True)
-    path = os.path.join(model_card_dir, "README.md")
-    print(f"Generating {path}")
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(readme)
-
-# make sure we are under the root of the project
-repo_dir = Path(__file__).resolve().parent.parent.parent
-model_cards_dir = repo_dir / "model_cards"
-
-for model_name in ["wmt16-en-de-dist-12-1", "wmt16-en-de-dist-6-1", "wmt16-en-de-12-1"]:
-    model_card_dir = model_cards_dir / "allenai" / model_name
-    write_model_card(model_card_dir, src_lang="en", tgt_lang="de", model_name=model_name)
--- a/scripts/fsmt/gen-card-allenai-wmt19.py
+++ b/scripts/fsmt/gen-card-allenai-wmt19.py
@ -1,153 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Usage:
-# ./gen-card-allenai-wmt19.py
-
-import os
-from pathlib import Path
-
-
-def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
-
-    texts = {
-        "en": "Machine learning is great, isn't it?",
-        "ru": "Машинное обучение - это здорово, не так ли?",
-        "de": "Maschinelles Lernen ist großartig, nicht wahr?",
-    }
-
-    # BLUE scores as follows:
-    # "pair": [fairseq, transformers]
-    scores = {
-        "wmt19-de-en-6-6-base": [0, 38.37],
-        "wmt19-de-en-6-6-big": [0, 39.90],
-    }
-    pair = f"{src_lang}-{tgt_lang}"
-
-    readme = f"""
---
-
-language:
- {src_lang}
- {tgt_lang}
-thumbnail:
-tags:
- translation
- wmt19
- allenai
-license: apache-2.0
-datasets:
- wmt19
-metrics:
- bleu
---
-
-# FSMT
-
-## Model description
-
-This is a ported version of fairseq-based [wmt19 transformer](https://github.com/jungokasai/deep-shallow/) for {src_lang}-{tgt_lang}.
-
-For more details, please, see [Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation](https://arxiv.org/abs/2006.10369).
-
-2 models are available:
-
-* [wmt19-de-en-6-6-big](https://huggingface.co/allenai/wmt19-de-en-6-6-big)
-* [wmt19-de-en-6-6-base](https://huggingface.co/allenai/wmt19-de-en-6-6-base)
-
-
-## Intended uses & limitations
-
-#### How to use
-
-```python
-from transformers import FSMTForConditionalGeneration, FSMTTokenizer
-mname = "allenai/{model_name}"
-tokenizer = FSMTTokenizer.from_pretrained(mname)
-model = FSMTForConditionalGeneration.from_pretrained(mname)
-
-input = "{texts[src_lang]}"
-input_ids = tokenizer.encode(input, return_tensors="pt")
-outputs = model.generate(input_ids)
-decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print(decoded) # {texts[tgt_lang]}
-
-```
-
-#### Limitations and bias
-
-
-## Training data
-
-Pretrained weights were left identical to the original model released by allenai. For more details, please, see the [paper](https://arxiv.org/abs/2006.10369).
-
-## Eval results
-
-Here are the BLEU scores:
-
-model   |  transformers
-------|---------
-{model_name}  |  {scores[model_name][1]}
-
-The score was calculated using this code:
-
-```bash
-git clone https://github.com/huggingface/transformers
-cd transformers
-export PAIR={pair}
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=5
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py allenai/{model_name} $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-```
-
-## Data Sources
-
- [training, etc.](http://www.statmt.org/wmt19/)
- [test set](http://matrix.statmt.org/test_sets/newstest2019.tgz?1556572561)
-
-
-### BibTeX entry and citation info
-
-```
-@misc{{kasai2020deep,
-    title={{Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation}},
-    author={{Jungo Kasai and Nikolaos Pappas and Hao Peng and James Cross and Noah A. Smith}},
-    year={{2020}},
-    eprint={{2006.10369}},
-    archivePrefix={{arXiv}},
-    primaryClass={{cs.CL}}
-}}
-```
-
-"""
-    model_card_dir.mkdir(parents=True, exist_ok=True)
-    path = os.path.join(model_card_dir, "README.md")
-    print(f"Generating {path}")
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(readme)
-
-# make sure we are under the root of the project
-repo_dir = Path(__file__).resolve().parent.parent.parent
-model_cards_dir = repo_dir / "model_cards"
-
-for model_name in ["wmt19-de-en-6-6-base", "wmt19-de-en-6-6-big"]:
-    model_card_dir = model_cards_dir / "allenai" / model_name
-    write_model_card(model_card_dir, src_lang="de", tgt_lang="en", model_name=model_name)
--- a/scripts/fsmt/gen-card-facebook-wmt19.py
+++ b/scripts/fsmt/gen-card-facebook-wmt19.py
@ -1,165 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Usage:
-# ./gen-card-facebook-wmt19.py
-
-import os
-from pathlib import Path
-
-
-def write_model_card(model_card_dir, src_lang, tgt_lang):
-
-    texts = {
-        "en": "Machine learning is great, isn't it?",
-        "ru": "Машинное обучение - это здорово, не так ли?",
-        "de": "Maschinelles Lernen ist großartig, oder?",
-    }
-
-    # BLUE scores as follows:
-    # "pair": [fairseq, transformers]
-    scores = {
-        "ru-en": ["[41.3](http://matrix.statmt.org/matrix/output/1907?run_id=6937)", "39.20"],
-        "en-ru": ["[36.4](http://matrix.statmt.org/matrix/output/1914?run_id=6724)", "33.47"],
-        "en-de": ["[43.1](http://matrix.statmt.org/matrix/output/1909?run_id=6862)", "42.83"],
-        "de-en": ["[42.3](http://matrix.statmt.org/matrix/output/1902?run_id=6750)", "41.35"],
-    }
-    pair = f"{src_lang}-{tgt_lang}"
-
-    readme = f"""
---
-language:
- {src_lang}
- {tgt_lang}
-thumbnail:
-tags:
- translation
- wmt19
- facebook
-license: apache-2.0
-datasets:
- wmt19
-metrics:
- bleu
---
-
-# FSMT
-
-## Model description
-
-This is a ported version of [fairseq wmt19 transformer](https://github.com/pytorch/fairseq/blob/master/examples/wmt19/README.md) for {src_lang}-{tgt_lang}.
-
-For more details, please see, [Facebook FAIR's WMT19 News Translation Task Submission](https://arxiv.org/abs/1907.06616).
-
-The abbreviation FSMT stands for FairSeqMachineTranslation
-
-All four models are available:
-
-* [wmt19-en-ru](https://huggingface.co/facebook/wmt19-en-ru)
-* [wmt19-ru-en](https://huggingface.co/facebook/wmt19-ru-en)
-* [wmt19-en-de](https://huggingface.co/facebook/wmt19-en-de)
-* [wmt19-de-en](https://huggingface.co/facebook/wmt19-de-en)
-
-## Intended uses & limitations
-
-#### How to use
-
-```python
-from transformers import FSMTForConditionalGeneration, FSMTTokenizer
-mname = "facebook/wmt19-{src_lang}-{tgt_lang}"
-tokenizer = FSMTTokenizer.from_pretrained(mname)
-model = FSMTForConditionalGeneration.from_pretrained(mname)
-
-input = "{texts[src_lang]}"
-input_ids = tokenizer.encode(input, return_tensors="pt")
-outputs = model.generate(input_ids)
-decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print(decoded) # {texts[tgt_lang]}
-
-```
-
-#### Limitations and bias
-
- The original (and this ported model) doesn't seem to handle well inputs with repeated sub-phrases, [content gets truncated](https://discuss.huggingface.co/t/issues-with-translating-inputs-containing-repeated-phrases/981)
-
-## Training data
-
-Pretrained weights were left identical to the original model released by fairseq. For more details, please, see the [paper](https://arxiv.org/abs/1907.06616).
-
-## Eval results
-
-pair   | fairseq | transformers
-------|---------|----------
-{pair}  | {scores[pair][0]} | {scores[pair][1]}
-
-The score is slightly below the score reported by `fairseq`, since `transformers`` currently doesn't support:
- model ensemble, therefore the best performing checkpoint was ported (``model4.pt``).
- re-ranking
-
-The score was calculated using this code:
-
-```bash
-git clone https://github.com/huggingface/transformers
-cd transformers
-export PAIR={pair}
-export DATA_DIR=data/$PAIR
-export SAVE_DIR=data/$PAIR
-export BS=8
-export NUM_BEAMS=15
-mkdir -p $DATA_DIR
-sacrebleu -t wmt19 -l $PAIR --echo src > $DATA_DIR/val.source
-sacrebleu -t wmt19 -l $PAIR --echo ref > $DATA_DIR/val.target
-echo $PAIR
-PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval.py facebook/wmt19-$PAIR $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation --num_beams $NUM_BEAMS
-```
-note: fairseq reports using a beam of 50, so you should get a slightly higher score if re-run with `--num_beams 50`.
-
-## Data Sources
-
- [training, etc.](http://www.statmt.org/wmt19/)
- [test set](http://matrix.statmt.org/test_sets/newstest2019.tgz?1556572561)
-
-
-### BibTeX entry and citation info
-
-```bibtex
-@inproceedings{{...,
-  year={{2020}},
-  title={{Facebook FAIR's WMT19 News Translation Task Submission}},
-  author={{Ng, Nathan and Yee, Kyra and Baevski, Alexei and Ott, Myle and Auli, Michael and Edunov, Sergey}},
-  booktitle={{Proc. of WMT}},
-}}
-```
-
-
-## TODO
-
- port model ensemble (fairseq uses 4 model checkpoints)
-
-"""
-    os.makedirs(model_card_dir, exist_ok=True)
-    path = os.path.join(model_card_dir, "README.md")
-    print(f"Generating {path}")
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(readme)
-
-# make sure we are under the root of the project
-repo_dir = Path(__file__).resolve().parent.parent.parent
-model_cards_dir = repo_dir / "model_cards"
-
-for model_name in ["wmt19-ru-en", "wmt19-en-ru", "wmt19-en-de", "wmt19-de-en"]:
-    base, src_lang, tgt_lang = model_name.split("-")
-    model_card_dir = model_cards_dir / "facebook" / model_name
-    write_model_card(model_card_dir, src_lang=src_lang, tgt_lang=tgt_lang)
--- a/scripts/fsmt/s3-move.sh
+++ b/scripts/fsmt/s3-move.sh
@ -1,116 +0,0 @@
-
-# this is the process of uploading the updated models to s3. As I can't upload them directly to the correct orgs, this script shows how this is done
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-1. upload updated models to my account
-
-transformers-cli upload -y wmt19-ru-en
-transformers-cli upload -y wmt19-en-ru
-transformers-cli upload -y wmt19-de-en
-transformers-cli upload -y wmt19-en-de
-transformers-cli upload -y wmt19-de-en-6-6-base
-transformers-cli upload -y wmt19-de-en-6-6-big
-transformers-cli upload -y wmt16-en-de-dist-12-1
-transformers-cli upload -y wmt16-en-de-dist-6-1
-transformers-cli upload -y wmt16-en-de-12-1
-
-
-2. ask someone to move them to:
-
-* to facebook: "wmt19-ru-en", "wmt19-en-ru", "wmt19-en-de", "wmt19-de-en"
-* to allenai: "wmt16-en-de-dist-12-1", "wmt16-en-de-dist-6-1", "wmt16-en-de-12-1", "wmt19-de-en-6-6-base", "wmt19-de-en-6-6-big"
-
-export b="s3://models.huggingface.co/bert"
-stas_to_fb () {
-	src=$1
-	shift
-	aws s3 sync $b/stas/$src $b/facebook/$src $@
-}
-
-stas_to_allenai () {
-	src=$1
-	shift
-	aws s3 sync $b/stas/$src $b/allenai/$src $@
-}
-
-stas_to_fb wmt19-en-ru
-stas_to_fb wmt19-ru-en
-stas_to_fb wmt19-en-de
-stas_to_fb wmt19-de-en
-
-stas_to_allenai wmt16-en-de-dist-12-1
-stas_to_allenai wmt16-en-de-dist-6-1
-stas_to_allenai wmt16-en-de-6-1
-stas_to_allenai wmt16-en-de-12-1
-stas_to_allenai wmt19-de-en-6-6-base
-stas_to_allenai wmt19-de-en-6-6-big
-
-
-3. and then remove all these model files from my account
-
-transformers-cli s3 rm wmt16-en-de-12-1/config.json
-transformers-cli s3 rm wmt16-en-de-12-1/merges.txt
-transformers-cli s3 rm wmt16-en-de-12-1/pytorch_model.bin
-transformers-cli s3 rm wmt16-en-de-12-1/tokenizer_config.json
-transformers-cli s3 rm wmt16-en-de-12-1/vocab-src.json
-transformers-cli s3 rm wmt16-en-de-12-1/vocab-tgt.json
-transformers-cli s3 rm wmt16-en-de-dist-12-1/config.json
-transformers-cli s3 rm wmt16-en-de-dist-12-1/merges.txt
-transformers-cli s3 rm wmt16-en-de-dist-12-1/pytorch_model.bin
-transformers-cli s3 rm wmt16-en-de-dist-12-1/tokenizer_config.json
-transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-src.json
-transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-tgt.json
-transformers-cli s3 rm wmt16-en-de-dist-6-1/config.json
-transformers-cli s3 rm wmt16-en-de-dist-6-1/merges.txt
-transformers-cli s3 rm wmt16-en-de-dist-6-1/pytorch_model.bin
-transformers-cli s3 rm wmt16-en-de-dist-6-1/tokenizer_config.json
-transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-src.json
-transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-tgt.json
-transformers-cli s3 rm wmt19-de-en-6-6-base/config.json
-transformers-cli s3 rm wmt19-de-en-6-6-base/merges.txt
-transformers-cli s3 rm wmt19-de-en-6-6-base/pytorch_model.bin
-transformers-cli s3 rm wmt19-de-en-6-6-base/tokenizer_config.json
-transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-src.json
-transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-tgt.json
-transformers-cli s3 rm wmt19-de-en-6-6-big/config.json
-transformers-cli s3 rm wmt19-de-en-6-6-big/merges.txt
-transformers-cli s3 rm wmt19-de-en-6-6-big/pytorch_model.bin
-transformers-cli s3 rm wmt19-de-en-6-6-big/tokenizer_config.json
-transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-src.json
-transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-tgt.json
-transformers-cli s3 rm wmt19-de-en/config.json
-transformers-cli s3 rm wmt19-de-en/merges.txt
-transformers-cli s3 rm wmt19-de-en/pytorch_model.bin
-transformers-cli s3 rm wmt19-de-en/tokenizer_config.json
-transformers-cli s3 rm wmt19-de-en/vocab-src.json
-transformers-cli s3 rm wmt19-de-en/vocab-tgt.json
-transformers-cli s3 rm wmt19-en-de/config.json
-transformers-cli s3 rm wmt19-en-de/merges.txt
-transformers-cli s3 rm wmt19-en-de/pytorch_model.bin
-transformers-cli s3 rm wmt19-en-de/tokenizer_config.json
-transformers-cli s3 rm wmt19-en-de/vocab-src.json
-transformers-cli s3 rm wmt19-en-de/vocab-tgt.json
-transformers-cli s3 rm wmt19-en-ru/config.json
-transformers-cli s3 rm wmt19-en-ru/merges.txt
-transformers-cli s3 rm wmt19-en-ru/pytorch_model.bin
-transformers-cli s3 rm wmt19-en-ru/tokenizer_config.json
-transformers-cli s3 rm wmt19-en-ru/vocab-src.json
-transformers-cli s3 rm wmt19-en-ru/vocab-tgt.json
-transformers-cli s3 rm wmt19-ru-en/config.json
-transformers-cli s3 rm wmt19-ru-en/merges.txt
-transformers-cli s3 rm wmt19-ru-en/pytorch_model.bin
-transformers-cli s3 rm wmt19-ru-en/tokenizer_config.json
-transformers-cli s3 rm wmt19-ru-en/vocab-src.json
-transformers-cli s3 rm wmt19-ru-en/vocab-tgt.json
--- a/scripts/fsmt/tests-to-run.sh
+++ b/scripts/fsmt/tests-to-run.sh
@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# these scripts need to be run before any changes to FSMT-related code - it should cover all bases
-
-CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
-RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
--- a/scripts/pegasus/build_test_sample_spm_no_bos.py
+++ b/scripts/pegasus/build_test_sample_spm_no_bos.py
@ -1,34 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus
-
-# 1. pip install sentencepiece
-#
-# 2. wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt
-
-# 3. build
-import sentencepiece as spm
-
-
-# pegasus:
-# 1. no bos
-# 2. eos_id is 1
-# 3. unk_id is 2
-# build a sample spm file accordingly
-spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=test_sentencepiece_no_bos --bos_id=-1 --unk_id=2  --eos_id=1  --vocab_size=1000')
-
-# 4. now update the fixture
-# mv test_sentencepiece_no_bos.model ../../tests/fixtures/
--- a/scripts/tatoeba/README.md
+++ b/scripts/tatoeba/README.md
@ -1,72 +0,0 @@
-<!---
-Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-->
-
-Setup transformers following instructions in README.md, (I would fork first).
-```bash
-git clone git@github.com:huggingface/transformers.git
-cd transformers
-pip install -e .
-pip install pandas GitPython wget
-```
-
-Get required metadata
-```bash
-curl https://cdn-datasets.huggingface.co/language_codes/language-codes-3b2.csv  > language-codes-3b2.csv
-curl https://cdn-datasets.huggingface.co/language_codes/iso-639-3.csv > iso-639-3.csv
-```
-
-Install Tatoeba-Challenge repo inside transformers
-```bash
-git clone git@github.com:Helsinki-NLP/Tatoeba-Challenge.git
-```
-
-To convert a few models, call the conversion script from command line:
-```bash
-python src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py --models heb-eng eng-heb --save_dir converted
-```
-
-To convert lots of models you can pass your list of Tatoeba model names to `resolver.convert_models` in a python client or script.
-
-```python
-from transformers.convert_marian_tatoeba_to_pytorch import TatoebaConverter
-resolver = TatoebaConverter(save_dir='converted')
-resolver.convert_models(['heb-eng', 'eng-heb'])
-```
-
-
-### Upload converted models
-Since version v3.5.0, the model sharing workflow is switched to git-based system . Refer to [model sharing doc](https://huggingface.co/transformers/main/model_sharing.html#model-sharing-and-uploading) for more details.
-
-To upload all converted models, 
-
-1. Install [git-lfs](https://git-lfs.github.com/).
-
-2. Login to `huggingface-cli`
-
-```bash
-huggingface-cli login
-```
-
-3. Run the `upload_models` script
-
-```bash
-./scripts/tatoeba/upload_models.sh
-```
-
-
-### Modifications
- To change naming logic, change the code near `os.rename`. The model card creation code may also need to change.
- To change model card content, you must modify `TatoebaCodeResolver.write_model_card`
--- a/scripts/tatoeba/upload_models.sh
+++ b/scripts/tatoeba/upload_models.sh
@ -1,12 +0,0 @@
-#!/bin/bash
-
-for FILE in converted/*; do 
-  model_name=`basename $FILE`
-  huggingface-cli repo create $model_name -y
-  git clone https://huggingface.co/Helsinki-NLP/$model_name
-  mv $FILE/* $model_name/
-  cd $model_name
-  git add . && git commit -m "initial commit" 
-  git push
-  cd ..
-done
--- a/setup.py
+++ b/setup.py
@ -163,7 +163,6 @@ _deps = [
    "rjieba",
    "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
    "ruff==0.11.2",
-    "sacrebleu>=1.4.12,<2.0.0",
    "sacremoses",
    "safetensors>=0.4.3",
    "sagemaker>=2.31.0",
@ -344,7 +343,6 @@ extras["testing"] = (
        "evaluate",
        "pytest-timeout",
        "ruff",
-        "sacrebleu",
        "rouge-score",
        "nltk",
        "GitPython",
--- a/src/transformers/dependency_versions_table.py
+++ b/src/transformers/dependency_versions_table.py
@ -69,7 +69,6 @@ deps = {
    "rjieba": "rjieba",
    "rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
    "ruff": "ruff==0.11.2",
-    "sacrebleu": "sacrebleu>=1.4.12,<2.0.0",
    "sacremoses": "sacremoses",
    "safetensors": "safetensors>=0.4.3",
    "sagemaker": "sagemaker>=2.31.0",
--- a/src/transformers/models/fsmt/modeling_fsmt.py
+++ b/src/transformers/models/fsmt/modeling_fsmt.py
@ -112,6 +112,7 @@ _CONFIG_FOR_DOC = "FSMTConfig"
 """

 Here is how to compare BLEU scores against fairseq implementation:
+(don't forget to install sacrebleu: `pip install sacrebleu`)

 # en-ru