mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Fix some typos about benchmark scripts. (#37027)
Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
This commit is contained in:
parent
1fcaad6df9
commit
c90e6e9625
@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
|||||||
|
|
||||||
## Writing metrics to the database
|
## Writing metrics to the database
|
||||||
|
|
||||||
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||||
|
|
||||||
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ import importlib.util
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
import psycopg2
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from psycopg2.extras import Json
|
from psycopg2.extras import Json
|
||||||
|
@ -215,7 +215,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
|||||||
torch.cuda.synchronize()
|
torch.cuda.synchronize()
|
||||||
end = perf_counter()
|
end = perf_counter()
|
||||||
time_to_second_token = end - start
|
time_to_second_token = end - start
|
||||||
logger.info(f"completed second compile generation in: {time_to_first_token}s")
|
logger.info(f"completed second compile generation in: {time_to_second_token}s")
|
||||||
cache_position += 1
|
cache_position += 1
|
||||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||||
|
|
||||||
@ -227,7 +227,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
|||||||
torch.cuda.synchronize()
|
torch.cuda.synchronize()
|
||||||
end = perf_counter()
|
end = perf_counter()
|
||||||
time_to_third_token = end - start
|
time_to_third_token = end - start
|
||||||
logger.info(f"completed third compile forward in: {time_to_first_token}s")
|
logger.info(f"completed third compile forward in: {time_to_third_token}s")
|
||||||
cache_position += 1
|
cache_position += 1
|
||||||
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
all_generated_tokens += next_token.clone().detach().cpu().tolist()
|
||||||
|
|
||||||
@ -298,7 +298,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
|||||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||||
end = perf_counter()
|
end = perf_counter()
|
||||||
third_compile_generate_time = end - start
|
third_compile_generate_time = end - start
|
||||||
logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
|
logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
|
||||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||||
|
|
||||||
past_key_values = StaticCache(
|
past_key_values = StaticCache(
|
||||||
@ -313,7 +313,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
|||||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||||
end = perf_counter()
|
end = perf_counter()
|
||||||
fourth_compile_generate_time = end - start
|
fourth_compile_generate_time = end - start
|
||||||
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
|
logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
|
||||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||||
|
|
||||||
metrics_recorder.collect_model_measurements(
|
metrics_recorder.collect_model_measurements(
|
||||||
|
Loading…
Reference in New Issue
Block a user