[Benchmark] Extend Benchmark to all model type extensions (#5241)

* add benchmark for all kinds of models * improved import * delete bogus files * make style
2025-07-31 02:02:21 +06:00 · 2020-06-24 15:11:42 +02:00 · 2020-06-24 15:11:42 +02:00 · 9fe09cec76
commit 9fe09cec76
parent 7c41057d50
7 changed files with 89 additions and 17 deletions
--- a/examples/benchmarking/plot_csv_file.py
+++ b/examples/benchmarking/plot_csv_file.py
@ -5,6 +5,7 @@ from typing import Optional

 import matplotlib.pyplot as plt
 import numpy as np
+from matplotlib.ticker import ScalarFormatter

 from transformers import HfArgumentParser

@ -24,6 +25,9 @@ class PlotArguments:
        default=False,
        metadata={"help": "Whether the csv file has time results or memory results. Defaults to memory results."},
    )
+    no_log_scale: bool = field(
+        default=False, metadata={"help": "Disable logarithmic scale when plotting"},
+    )
    is_train: bool = field(
        default=False,
        metadata={
@ -55,6 +59,14 @@ class Plot:
        title_str = "Time usage" if self.args.is_time else "Memory usage"
        title_str = title_str + " for training" if self.args.is_train else title_str + " for inference"

+        if not self.args.no_log_scale:
+            # set logarithm scales
+            ax.set_xscale("log")
+            ax.set_yscale("log")
+
+        for axis in [ax.xaxis, ax.yaxis]:
+            axis.set_major_formatter(ScalarFormatter())
+
        for model_name in self.result_dict.keys():
            batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
            sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
@ -64,17 +76,12 @@ class Plot:
                (batch_sizes, sequence_lengths) if self.args.plot_along_batch else (sequence_lengths, batch_sizes)
            )

-            plt.xlim(min(x_axis_array), max(x_axis_array))
-
            for inner_loop_value in inner_loop_array:
                if self.args.plot_along_batch:
                    y_axis_array = np.asarray([results[(x, inner_loop_value)] for x in x_axis_array], dtype=np.int)
                else:
                    y_axis_array = np.asarray([results[(inner_loop_value, x)] for x in x_axis_array], dtype=np.float32)

-                ax.set_xscale("log", basex=2)
-                ax.set_yscale("log", basey=10)
-
                (x_axis_label, inner_loop_label) = (
                    ("batch_size", "sequence_length in #tokens")
                    if self.args.plot_along_batch
--- a/src/transformers/benchmark/benchmark.py
+++ b/src/transformers/benchmark/benchmark.py
@ -87,8 +87,18 @@ class PyTorchBenchmark(Benchmark):

        if self.args.torchscript:
            config.torchscript = True
-        if self.args.with_lm_head:
-            model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
+
+        has_model_class_in_config = hasattr(config, "architecture") and len(config.architectures) > 1
+        if not self.args.only_pretrain_model and has_model_class_in_config:
+            try:
+                model_class = config.architectures[0]
+                transformers_module = __import__("transformers", fromlist=[model_class])
+                model_cls = getattr(transformers_module, model_class)
+                model = model_cls(config)
+            except ImportError:
+                raise ImportError(
+                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
+                )
        else:
            model = MODEL_MAPPING[config.__class__](config)

@ -127,7 +137,20 @@ class PyTorchBenchmark(Benchmark):

    def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
        config = self.config_dict[model_name]
-        model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
+
+        has_model_class_in_config = hasattr(config, "architecture") and len(config.architectures) > 1
+        if not self.args.only_pretrain_model and has_model_class_in_config:
+            try:
+                model_class = config.architectures[0]
+                transformers_module = __import__("transformers", fromlist=[model_class])
+                model_cls = getattr(transformers_module, model_class)
+                model = model_cls(config)
+            except ImportError:
+                raise ImportError(
+                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
+                )
+        else:
+            model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)

        if self.args.torchscript:
            raise NotImplementedError("Training for torchscript is currently not implemented")
--- a/src/transformers/benchmark/benchmark_args_utils.py
+++ b/src/transformers/benchmark/benchmark_args_utils.py
@ -105,6 +105,12 @@ class BenchmarkArguments:
        metadata={"help": "Log filename used if print statements are saved in log."},
    )
    repeat: int = field(default=3, metadata={"help": "Times an experiment will be run."})
+    only_pretrain_model: bool = field(
+        default=False,
+        metadata={
+            "help": "Instead of loading the model as defined in `config.architectures` if exists, just load the pretrain model weights."
+        },
+    )

    def to_json_string(self):
        """
--- a/src/transformers/benchmark/benchmark_tf.py
+++ b/src/transformers/benchmark/benchmark_tf.py
@ -24,13 +24,7 @@ import timeit
 from functools import wraps
 from typing import Callable, Optional

-from transformers import (
-    TF_MODEL_MAPPING,
-    TF_MODEL_WITH_LM_HEAD_MAPPING,
-    PretrainedConfig,
-    is_py3nvml_available,
-    is_tf_available,
-)
+from transformers import TF_MODEL_MAPPING, PretrainedConfig, is_py3nvml_available, is_tf_available

 from .benchmark_utils import (
    Benchmark,
@ -125,8 +119,17 @@ class TensorflowBenchmark(Benchmark):
        if self.args.fp16:
            raise NotImplementedError("Mixed precision is currently not supported.")

-        if self.args.with_lm_head:
-            model = TF_MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
+        has_model_class_in_config = hasattr(config, "architecture") and len(config.architectures) > 1
+        if not self.args.only_pretrain_model and has_model_class_in_config:
+            try:
+                model_class = "TF" + config.architectures[0]  # prepend 'TF' for tensorflow model
+                transformers_module = __import__("transformers", fromlist=[model_class])
+                model_cls = getattr(transformers_module, model_class)
+                model = model_cls(config)
+            except ImportError:
+                raise ImportError(
+                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
+                )
        else:
            model = TF_MODEL_MAPPING[config.__class__](config)

--- a/src/transformers/benchmark/benchmark_utils.py
+++ b/src/transformers/benchmark/benchmark_utils.py
@ -752,6 +752,7 @@ class Benchmark(ABC):
            info["time"] = datetime.time(datetime.now())
            info["fp16"] = self.args.fp16
            info["use_multiprocessing"] = self.args.do_multi_processing
+            info["only_pretrain_model"] = self.args.only_pretrain_model

            if is_psutil_available():
                info["cpu_ram_mb"] = bytes_to_mega_bytes(psutil.virtual_memory().total)
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@ -38,6 +38,22 @@ class BenchmarkTest(unittest.TestCase):
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

+    def test_inference_no_configs_only_pretrain(self):
+        MODEL_ID = "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
+        benchmark_args = PyTorchBenchmarkArguments(
+            models=[MODEL_ID],
+            training=False,
+            no_inference=False,
+            sequence_lengths=[8],
+            batch_sizes=[1],
+            no_multi_process=True,
+            only_pretrain_model=True,
+        )
+        benchmark = PyTorchBenchmark(benchmark_args)
+        results = benchmark.run()
+        self.check_results_dict_not_empty(results.time_inference_result)
+        self.check_results_dict_not_empty(results.memory_inference_result)
+
    def test_inference_torchscript(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = PyTorchBenchmarkArguments(
--- a/tests/test_benchmark_tf.py
+++ b/tests/test_benchmark_tf.py
@ -37,6 +37,22 @@ class TFBenchmarkTest(unittest.TestCase):
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

+    def test_inference_no_configs_only_pretrain(self):
+        MODEL_ID = "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
+        benchmark_args = TensorflowBenchmarkArguments(
+            models=[MODEL_ID],
+            training=False,
+            no_inference=False,
+            sequence_lengths=[8],
+            batch_sizes=[1],
+            no_multi_process=True,
+            only_pretrain_model=True,
+        )
+        benchmark = TensorflowBenchmark(benchmark_args)
+        results = benchmark.run()
+        self.check_results_dict_not_empty(results.time_inference_result)
+        self.check_results_dict_not_empty(results.memory_inference_result)
+
    def test_inference_no_configs_graph(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = TensorflowBenchmarkArguments(