[CI/Benchmark] add more iteration and use median for robust latency benchmark (#3889)
This commit is contained in:
parent
54951ac4bf
commit
e4be7d70bb
@ -68,7 +68,8 @@ def main(args: argparse.Namespace):
|
||||
return latency
|
||||
|
||||
print("Warming up...")
|
||||
run_to_completion(profile_dir=None)
|
||||
for _ in tqdm(range(args.num_iters_warmup), desc="Warmup iterations"):
|
||||
run_to_completion(profile_dir=None)
|
||||
|
||||
if args.profile:
|
||||
profile_dir = args.profile_result_dir
|
||||
@ -84,7 +85,12 @@ def main(args: argparse.Namespace):
|
||||
latencies = []
|
||||
for _ in tqdm(range(args.num_iters), desc="Profiling iterations"):
|
||||
latencies.append(run_to_completion(profile_dir=None))
|
||||
latencies = np.array(latencies)
|
||||
percentages = [10, 25, 50, 75, 90]
|
||||
percentiles = np.percentile(latencies, percentages)
|
||||
print(f'Avg latency: {np.mean(latencies)} seconds')
|
||||
for percentage, percentile in zip(percentages, percentiles):
|
||||
print(f'{percentage}% percentile latency: {percentile} seconds')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -106,9 +112,13 @@ if __name__ == '__main__':
|
||||
default=1,
|
||||
help='Number of generated sequences per prompt.')
|
||||
parser.add_argument('--use-beam-search', action='store_true')
|
||||
parser.add_argument('--num-iters-warmup',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of iterations to run for warmup.')
|
||||
parser.add_argument('--num-iters',
|
||||
type=int,
|
||||
default=3,
|
||||
default=30,
|
||||
help='Number of iterations to run.')
|
||||
parser.add_argument('--trust-remote-code',
|
||||
action='store_true',
|
||||
|
Loading…
x
Reference in New Issue
Block a user