Fix some typos about benchmark scripts. (#37027)

Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
2025-07-03 04:40:06 +06:00 · 2025-03-28 22:10:20 +08:00 · 2025-03-28 22:10:20 +08:00 · c90e6e9625
commit c90e6e9625
parent 1fcaad6df9
3 changed files with 5 additions and 6 deletions
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

 ## Writing metrics to the database

-`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
+`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.

 cf [`llama.py`](./llama.py) to see an example of this in practice.

--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -3,7 +3,6 @@ import importlib.util
 import logging
 import os
 from typing import Dict
-import psycopg2
 import sys

 from psycopg2.extras import Json
--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@ -215,7 +215,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            torch.cuda.synchronize()
            end = perf_counter()
            time_to_second_token = end - start
-            logger.info(f"completed second compile generation in: {time_to_first_token}s")
+            logger.info(f"completed second compile generation in: {time_to_second_token}s")
            cache_position += 1
            all_generated_tokens += next_token.clone().detach().cpu().tolist()

@ -227,7 +227,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            torch.cuda.synchronize()
            end = perf_counter()
            time_to_third_token = end - start
-            logger.info(f"completed third compile forward in: {time_to_first_token}s")
+            logger.info(f"completed third compile forward in: {time_to_third_token}s")
            cache_position += 1
            all_generated_tokens += next_token.clone().detach().cpu().tolist()

@ -298,7 +298,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            output = model.generate(**inputs, past_key_values=past_key_values)
            end = perf_counter()
            third_compile_generate_time = end - start
-            logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
+            logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")

            past_key_values = StaticCache(
@ -313,7 +313,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            output = model.generate(**inputs, past_key_values=past_key_values)
            end = perf_counter()
            fourth_compile_generate_time = end - start
-            logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
+            logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")

        metrics_recorder.collect_model_measurements(