[CI/Benchmark] add more iteration and use median for robust latency benchmark (#3889)
This commit is contained in:
parent
54951ac4bf
commit
e4be7d70bb
@ -68,7 +68,8 @@ def main(args: argparse.Namespace):
|
|||||||
return latency
|
return latency
|
||||||
|
|
||||||
print("Warming up...")
|
print("Warming up...")
|
||||||
run_to_completion(profile_dir=None)
|
for _ in tqdm(range(args.num_iters_warmup), desc="Warmup iterations"):
|
||||||
|
run_to_completion(profile_dir=None)
|
||||||
|
|
||||||
if args.profile:
|
if args.profile:
|
||||||
profile_dir = args.profile_result_dir
|
profile_dir = args.profile_result_dir
|
||||||
@ -84,7 +85,12 @@ def main(args: argparse.Namespace):
|
|||||||
latencies = []
|
latencies = []
|
||||||
for _ in tqdm(range(args.num_iters), desc="Profiling iterations"):
|
for _ in tqdm(range(args.num_iters), desc="Profiling iterations"):
|
||||||
latencies.append(run_to_completion(profile_dir=None))
|
latencies.append(run_to_completion(profile_dir=None))
|
||||||
|
latencies = np.array(latencies)
|
||||||
|
percentages = [10, 25, 50, 75, 90]
|
||||||
|
percentiles = np.percentile(latencies, percentages)
|
||||||
print(f'Avg latency: {np.mean(latencies)} seconds')
|
print(f'Avg latency: {np.mean(latencies)} seconds')
|
||||||
|
for percentage, percentile in zip(percentages, percentiles):
|
||||||
|
print(f'{percentage}% percentile latency: {percentile} seconds')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@ -106,9 +112,13 @@ if __name__ == '__main__':
|
|||||||
default=1,
|
default=1,
|
||||||
help='Number of generated sequences per prompt.')
|
help='Number of generated sequences per prompt.')
|
||||||
parser.add_argument('--use-beam-search', action='store_true')
|
parser.add_argument('--use-beam-search', action='store_true')
|
||||||
|
parser.add_argument('--num-iters-warmup',
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help='Number of iterations to run for warmup.')
|
||||||
parser.add_argument('--num-iters',
|
parser.add_argument('--num-iters',
|
||||||
type=int,
|
type=int,
|
||||||
default=3,
|
default=30,
|
||||||
help='Number of iterations to run.')
|
help='Number of iterations to run.')
|
||||||
parser.add_argument('--trust-remote-code',
|
parser.add_argument('--trust-remote-code',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user