Skip to content

Commit

Permalink
Fix throughput calculation in image-to-text example (#1070)
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss committed Jun 13, 2024
1 parent 33ee016 commit f428dd5
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions examples/image-to-text/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def main():
parser.add_argument("--batch_size", type=int, default=1, help="Input batch size.")
parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
parser.add_argument(
"--ignore_eos",
action="store_true",
help="Whether to ignore eos, set False to disable it.",
)
args = parser.parse_args()

# set args.quant_config with env variable if it is set
Expand Down Expand Up @@ -143,7 +148,7 @@ def main():
"lazy_mode": True,
"hpu_graphs": args.use_hpu_graphs,
"max_new_tokens": args.max_new_tokens,
"ignore_eos": True,
"ignore_eos": args.ignore_eos,
}
if args.use_hpu_graphs:
from habana_frameworks.torch.hpu import wrap_in_hpu_graph
Expand Down Expand Up @@ -171,7 +176,14 @@ def main():
end = time.perf_counter()
duration = end - start

total_new_tokens_generated = args.n_iterations * args.batch_size * args.max_new_tokens
# Let's calculate the number of generated tokens
n_input_tokens = len(generator.tokenizer(args.prompt).input_ids) if args.prompt is not None else 0
n_output_tokens = 0
for sequence in result:
# We have to subtract the number of input tokens as they are part of the returned sequence
n_output_tokens += len(generator.tokenizer(sequence[0]["generated_text"]).input_ids) - n_input_tokens

total_new_tokens_generated = args.n_iterations * n_output_tokens
throughput = total_new_tokens_generated / duration
logger.info(
f"result = {result}, time = {(end-start) * 1000 / args.n_iterations }ms, Throughput (including tokenization) = {throughput} tokens/second"
Expand Down

0 comments on commit f428dd5

Please sign in to comment.