[CI/Benchmark] add more iteration and use median for robust latency benchmark (#3889)

This commit is contained in:
youkaichao 2024-04-06 14:32:30 -07:00 committed by GitHub
parent 54951ac4bf
commit e4be7d70bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -68,7 +68,8 @@ def main(args: argparse.Namespace):
return latency
print("Warming up...")
run_to_completion(profile_dir=None)
for _ in tqdm(range(args.num_iters_warmup), desc="Warmup iterations"):
run_to_completion(profile_dir=None)
if args.profile:
profile_dir = args.profile_result_dir
@ -84,7 +85,12 @@ def main(args: argparse.Namespace):
latencies = []
for _ in tqdm(range(args.num_iters), desc="Profiling iterations"):
latencies.append(run_to_completion(profile_dir=None))
latencies = np.array(latencies)
percentages = [10, 25, 50, 75, 90]
percentiles = np.percentile(latencies, percentages)
print(f'Avg latency: {np.mean(latencies)} seconds')
for percentage, percentile in zip(percentages, percentiles):
print(f'{percentage}% percentile latency: {percentile} seconds')
if __name__ == '__main__':
@ -106,9 +112,13 @@ if __name__ == '__main__':
default=1,
help='Number of generated sequences per prompt.')
parser.add_argument('--use-beam-search', action='store_true')
parser.add_argument('--num-iters-warmup',
type=int,
default=10,
help='Number of iterations to run for warmup.')
parser.add_argument('--num-iters',
type=int,
default=3,
default=30,
help='Number of iterations to run.')
parser.add_argument('--trust-remote-code',
action='store_true',