mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00
[Benchmark] Reuse optimum-benchmark
(#30615)
* benchmark * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
3b09d3f05f
commit
64e0573a81
7
Makefile
7
Makefile
@ -1,4 +1,4 @@
|
||||
.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples
|
||||
.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples benchmark
|
||||
|
||||
# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
|
||||
export PYTHONPATH = src
|
||||
@ -96,6 +96,11 @@ test:
|
||||
test-examples:
|
||||
python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
|
||||
|
||||
# Run benchmark
|
||||
|
||||
benchmark:
|
||||
python3 benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=diff backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
|
||||
|
||||
# Run tests for SageMaker DLC release
|
||||
|
||||
test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
|
||||
|
0
benchmark/__init__.py
Normal file
0
benchmark/__init__.py
Normal file
310
benchmark/benchmark.py
Normal file
310
benchmark/benchmark.py
Normal file
@ -0,0 +1,310 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Run benchmark using the `optimum-benchmark` library with some customization in `transformers`.
|
||||
|
||||
Assume we are under `transformers` root directory: (make sure the commits are valid commits)
|
||||
```bash
|
||||
python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun
|
||||
```
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from git import Repo
|
||||
|
||||
from optimum_benchmark import Benchmark
|
||||
from optimum_benchmark_wrapper import main
|
||||
|
||||
|
||||
PATH_TO_REPO = Path(__file__).parent.parent.resolve()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout_commit(repo: Repo, commit_id: str):
|
||||
"""
|
||||
Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit.
|
||||
Args:
|
||||
repo (`git.Repo`): A git repository (for instance the Transformers repo).
|
||||
commit_id (`str`): The commit reference to checkout inside the context manager.
|
||||
"""
|
||||
current_head = repo.head.commit if repo.head.is_detached else repo.head.ref
|
||||
|
||||
try:
|
||||
repo.git.checkout(commit_id)
|
||||
yield
|
||||
|
||||
finally:
|
||||
repo.git.checkout(current_head)
|
||||
|
||||
|
||||
def summarize(run_dir, metrics, expand_metrics=False):
|
||||
"""Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`.
|
||||
|
||||
Each summary's format is as follows (for `expand_metrics=False`):
|
||||
```
|
||||
{
|
||||
"model": "google/gemma-2b",
|
||||
"commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7",
|
||||
"config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5",
|
||||
"metrics": {
|
||||
"decode.latency.mean": 1.624666809082031,
|
||||
"per_token.latency.mean": 0.012843788806628804,
|
||||
"per_token.throughput.value": 77.85864553330948
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True)
|
||||
report_dirs = [str(Path(report).parent) for report in reports]
|
||||
|
||||
summaries = []
|
||||
for report_dir in report_dirs:
|
||||
commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0]
|
||||
|
||||
if not os.path.isfile(os.path.join(report_dir, "benchmark.json")):
|
||||
continue
|
||||
benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json"))
|
||||
report = benchmark.report
|
||||
|
||||
model = benchmark.config.backend["model"]
|
||||
|
||||
# Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
|
||||
# (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
|
||||
benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
|
||||
benchmark_name = str(Path(benchmark_name).parts[-1])
|
||||
if benchmark_name.startswith("commit="):
|
||||
benchmark_name = benchmark.config.name
|
||||
|
||||
metrics_values = {}
|
||||
# post-processing of report: show a few selected/important metric
|
||||
for metric in metrics:
|
||||
keys = metric.split(".")
|
||||
value = report
|
||||
current = metrics_values
|
||||
for key in keys:
|
||||
# Avoid KeyError when a user's specified metric has typo.
|
||||
# TODO: Give warnings.
|
||||
if key not in value:
|
||||
continue
|
||||
value = value[key]
|
||||
|
||||
if expand_metrics:
|
||||
if isinstance(value, dict):
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
else:
|
||||
current[key] = value
|
||||
|
||||
if not expand_metrics:
|
||||
metrics_values[metric] = value
|
||||
|
||||
# show some config information
|
||||
print(f"model: {model}")
|
||||
print(f"commit: {commit}")
|
||||
print(f"config: {benchmark_name}")
|
||||
if len(metrics_values) > 0:
|
||||
print("metrics:")
|
||||
if expand_metrics:
|
||||
print(metrics_values)
|
||||
else:
|
||||
for metric, value in metrics_values.items():
|
||||
print(f" - {metric}: {value}")
|
||||
print("-" * 80)
|
||||
|
||||
summary = {
|
||||
"model": model,
|
||||
"commit": commit,
|
||||
"config": benchmark_name,
|
||||
"metrics": metrics_values,
|
||||
}
|
||||
summaries.append(summary)
|
||||
|
||||
with open(os.path.join(report_dir, "summary.json"), "w") as fp:
|
||||
json.dump(summary, fp, indent=4)
|
||||
|
||||
# TODO: upload to Hub
|
||||
return summaries
|
||||
|
||||
|
||||
def combine_summaries(summaries):
|
||||
"""Combine a list of summary obtained from the function `summarize`.
|
||||
|
||||
The combined summary's format is as follows:
|
||||
```
|
||||
"google/gemma-2b": {
|
||||
"benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": {
|
||||
"3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
|
||||
"metrics": {"decode.latency.mean": 1.624666809082031}
|
||||
},
|
||||
"c97ee28b117c0abe8e08891f402065e4df6d72aa": {
|
||||
"metrics": {"decode.latency.mean": 1.6278163452148438}
|
||||
}
|
||||
},
|
||||
"benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": {
|
||||
"3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
|
||||
"metrics": {"decode.latency.mean": 1.6947791748046876}
|
||||
},
|
||||
"c97ee28b117c0abe8e08891f402065e4df6d72aa": {
|
||||
"metrics": {
|
||||
"decode.latency.mean": 1.6980519409179688}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
combined = {}
|
||||
for summary in summaries:
|
||||
model = summary["model"]
|
||||
config = summary["config"]
|
||||
commit = summary["commit"]
|
||||
|
||||
if model not in combined:
|
||||
combined[model] = {}
|
||||
|
||||
if config not in combined[model]:
|
||||
combined[model][config] = {}
|
||||
|
||||
if commit not in combined[model][config]:
|
||||
combined[model][config][commit] = {"metrics": summary["metrics"]}
|
||||
|
||||
with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
|
||||
json.dump(combined, fp, indent=4)
|
||||
|
||||
# TODO: upload to Hub
|
||||
print(json.dumps(combined, indent=4))
|
||||
|
||||
return combined
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def list_str(values):
|
||||
return values.split(",")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
|
||||
parser.add_argument("--config-name", type=str, required=True, help="The config name.")
|
||||
|
||||
# arguments specific to this wrapper for our own customization
|
||||
parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.")
|
||||
parser.add_argument(
|
||||
"--commit",
|
||||
type=list_str,
|
||||
default="",
|
||||
help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
|
||||
)
|
||||
parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
|
||||
args, optimum_benchmark_args = parser.parse_known_args()
|
||||
|
||||
repo = Repo(PATH_TO_REPO)
|
||||
|
||||
metrics = [
|
||||
"prefill.latency.mean",
|
||||
"prefill.throughput.value",
|
||||
"decode.latency.mean",
|
||||
"decode.throughput.value",
|
||||
"per_token.latency.mean",
|
||||
"per_token.throughput.value",
|
||||
]
|
||||
if args.metrics is not None:
|
||||
metrics = args.metrics.split(",")
|
||||
|
||||
# Get `backend.model` in a hacky way: We want to control the experiment flow manually.
|
||||
models = [""]
|
||||
for idx, arg in enumerate(optimum_benchmark_args):
|
||||
if arg.startswith("backend.model="):
|
||||
models = arg[len("backend.model=") :]
|
||||
models = models.split(",")
|
||||
break
|
||||
optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")]
|
||||
|
||||
# Get the commit(s)
|
||||
current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref)
|
||||
commits = [x for x in args.commit if x != ""]
|
||||
if len(commits) == 0:
|
||||
commits = [current_head]
|
||||
elif len(commits) == 1 and commits[0] == "diff":
|
||||
# compare to `main`
|
||||
commits = ["main", current_head]
|
||||
|
||||
# Get the specified run directory
|
||||
run_dir_arg_idx, run_dir = -1, None
|
||||
sweep_dir_arg_idx, sweep_dir = -1, None
|
||||
for idx, arg in enumerate(optimum_benchmark_args):
|
||||
if arg.startswith("hydra.run.dir="):
|
||||
run_dir = arg[len("hydra.run.dir=") :]
|
||||
run_dir_arg_idx = idx
|
||||
elif arg.startswith("hydra.sweep.dir="):
|
||||
sweep_dir = arg[len("hydra.sweep.dir=") :]
|
||||
sweep_dir_arg_idx = idx
|
||||
exp_run_dir, arg_dix, arg_name = (
|
||||
(sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir")
|
||||
if "--multirun" in optimum_benchmark_args
|
||||
else (run_dir, run_dir_arg_idx, "hydra.run.dir")
|
||||
)
|
||||
|
||||
# TODO: not hardcoded
|
||||
if exp_run_dir is None and args.ensure_empty:
|
||||
exp_run_dir = "_benchmark"
|
||||
|
||||
if args.ensure_empty:
|
||||
os.makedirs(exp_run_dir, exist_ok=True)
|
||||
exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir)
|
||||
|
||||
run_summaries = []
|
||||
for commit in commits:
|
||||
with checkout_commit(repo, commit):
|
||||
commit = str(repo.head.commit)
|
||||
|
||||
commit_run_dir = exp_run_dir
|
||||
if exp_run_dir is not None:
|
||||
commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}")
|
||||
|
||||
print(f"Run benchmark on commit: {commit}")
|
||||
|
||||
for model in models:
|
||||
model_arg = [f"backend.model={model}"] if model != "" else []
|
||||
dir_args = []
|
||||
if commit_run_dir is not None:
|
||||
if arg_dix > -1:
|
||||
optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}"
|
||||
else:
|
||||
dir_args = [
|
||||
f"hydra.sweep.dir={commit_run_dir}",
|
||||
f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}",
|
||||
]
|
||||
main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args)
|
||||
|
||||
if commit_run_dir is not None:
|
||||
# Need to remove the `\` character
|
||||
summaries = summarize(commit_run_dir.replace("\\", ""), metrics)
|
||||
run_summaries.extend(summaries)
|
||||
|
||||
# aggregate the information across the commits
|
||||
if exp_run_dir is not None:
|
||||
with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp:
|
||||
json.dump(run_summaries, fp, indent=4)
|
||||
|
||||
combined_summary = combine_summaries(run_summaries)
|
57
benchmark/config/generation.yaml
Normal file
57
benchmark/config/generation.yaml
Normal file
@ -0,0 +1,57 @@
|
||||
defaults:
|
||||
- benchmark # inheriting benchmark schema
|
||||
- scenario: inference
|
||||
- launcher: process
|
||||
- backend: pytorch
|
||||
- _self_ # for hydra 1.1 compatibility
|
||||
|
||||
name: pytorch_generate
|
||||
|
||||
launcher:
|
||||
start_method: spawn
|
||||
device_isolation: true
|
||||
device_isolation_action: warn
|
||||
|
||||
backend:
|
||||
device: cuda
|
||||
device_ids: 0
|
||||
no_weights: true
|
||||
model: meta-llama/Llama-2-7b-hf
|
||||
cache_implementation: static
|
||||
torch_compile: true
|
||||
torch_dtype: float16
|
||||
torch_compile_config:
|
||||
backend: inductor
|
||||
mode: reduce-overhead
|
||||
fullgraph: true
|
||||
|
||||
scenario:
|
||||
input_shapes:
|
||||
batch_size: 1
|
||||
sequence_length: 7
|
||||
generate_kwargs:
|
||||
max_new_tokens: 128
|
||||
min_new_tokens: 128
|
||||
do_sample: false
|
||||
memory: true
|
||||
latency: true
|
||||
iterations: 2
|
||||
duration: 0
|
||||
|
||||
|
||||
# hydra/cli specific settings
|
||||
hydra:
|
||||
run:
|
||||
# where to store run results
|
||||
dir: runs/${name}
|
||||
job:
|
||||
# change working directory to the run directory
|
||||
chdir: true
|
||||
env_set:
|
||||
# set environment variable OVERRIDE_BENCHMARKS to 1
|
||||
# to not skip benchmarks that have been run before
|
||||
OVERRIDE_BENCHMARKS: 1
|
||||
LOG_LEVEL: WARN
|
||||
sweep:
|
||||
dir: multirun
|
||||
subdir: ${hydra.job.override_dirname}
|
16
benchmark/optimum_benchmark_wrapper.py
Normal file
16
benchmark/optimum_benchmark_wrapper.py
Normal file
@ -0,0 +1,16 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
|
||||
def main(config_dir, config_name, args):
|
||||
subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
|
||||
parser.add_argument("--config-name", type=str, required=True, help="The config name.")
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
main(args.config_dir, args.config_name, unknown)
|
3
setup.py
3
setup.py
@ -136,6 +136,7 @@ _deps = [
|
||||
"onnxruntime-tools>=1.4.2",
|
||||
"onnxruntime>=1.4.0",
|
||||
"opencv-python",
|
||||
"optimum-benchmark>=0.2.0",
|
||||
"optuna",
|
||||
"optax>=0.0.8,<=0.1.4",
|
||||
"packaging>=20.0",
|
||||
@ -410,6 +411,8 @@ extras["agents"] = deps_list(
|
||||
"diffusers", "accelerate", "datasets", "torch", "sentencepiece", "opencv-python", "Pillow"
|
||||
)
|
||||
|
||||
extras["benchmark"] = deps_list("optimum-benchmark")
|
||||
|
||||
# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
|
||||
install_requires = [
|
||||
deps["filelock"], # filesystem locks, e.g., to prevent parallel downloads
|
||||
|
@ -42,6 +42,7 @@ deps = {
|
||||
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
||||
"onnxruntime": "onnxruntime>=1.4.0",
|
||||
"opencv-python": "opencv-python",
|
||||
"optimum-benchmark": "optimum-benchmark>=0.2.0",
|
||||
"optuna": "optuna",
|
||||
"optax": "optax>=0.0.8,<=0.1.4",
|
||||
"packaging": "packaging>=20.0",
|
||||
|
Loading…
Reference in New Issue
Block a user