Fix some typos about benchmark scripts. (#37027)

Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
This commit is contained in:
湛露先生 2025-03-28 22:10:20 +08:00 committed by GitHub
parent 1fcaad6df9
commit c90e6e9625
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 6 deletions

View File

@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
## Writing metrics to the database ## Writing metrics to the database
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements. `MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
cf [`llama.py`](./llama.py) to see an example of this in practice. cf [`llama.py`](./llama.py) to see an example of this in practice.

View File

@ -3,7 +3,6 @@ import importlib.util
import logging import logging
import os import os
from typing import Dict from typing import Dict
import psycopg2
import sys import sys
from psycopg2.extras import Json from psycopg2.extras import Json

View File

@ -215,7 +215,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
torch.cuda.synchronize() torch.cuda.synchronize()
end = perf_counter() end = perf_counter()
time_to_second_token = end - start time_to_second_token = end - start
logger.info(f"completed second compile generation in: {time_to_first_token}s") logger.info(f"completed second compile generation in: {time_to_second_token}s")
cache_position += 1 cache_position += 1
all_generated_tokens += next_token.clone().detach().cpu().tolist() all_generated_tokens += next_token.clone().detach().cpu().tolist()
@ -227,7 +227,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
torch.cuda.synchronize() torch.cuda.synchronize()
end = perf_counter() end = perf_counter()
time_to_third_token = end - start time_to_third_token = end - start
logger.info(f"completed third compile forward in: {time_to_first_token}s") logger.info(f"completed third compile forward in: {time_to_third_token}s")
cache_position += 1 cache_position += 1
all_generated_tokens += next_token.clone().detach().cpu().tolist() all_generated_tokens += next_token.clone().detach().cpu().tolist()
@ -298,7 +298,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
output = model.generate(**inputs, past_key_values=past_key_values) output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter() end = perf_counter()
third_compile_generate_time = end - start third_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {third_compile_generate_time}s") logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
past_key_values = StaticCache( past_key_values = StaticCache(
@ -313,7 +313,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
output = model.generate(**inputs, past_key_values=past_key_values) output = model.generate(**inputs, past_key_values=past_key_values)
end = perf_counter() end = perf_counter()
fourth_compile_generate_time = end - start fourth_compile_generate_time = end - start
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s") logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
metrics_recorder.collect_model_measurements( metrics_recorder.collect_model_measurements(