[Minor] Fix benchmark_latency script (#2765)
This commit is contained in:
parent
c9b45adeeb
commit
72d3a30c63
@ -37,7 +37,10 @@ def main(args: argparse.Namespace):
|
||||
max_tokens=args.output_len,
|
||||
)
|
||||
print(sampling_params)
|
||||
dummy_prompt_token_ids = [[0] * args.input_len] * args.batch_size
|
||||
dummy_prompt_token_ids = np.random.randint(10000,
|
||||
size=(args.batch_size,
|
||||
args.input_len))
|
||||
dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
|
||||
|
||||
def run_to_completion(profile_dir: Optional[str] = None):
|
||||
if profile_dir:
|
||||
@ -71,7 +74,7 @@ def main(args: argparse.Namespace):
|
||||
"."
|
||||
) / "vllm_benchmark_result" / f"latency_result_{time.time()}"
|
||||
print(f"Profiling (results will be saved to '{profile_dir}')...")
|
||||
run_to_completion(profile_dir=args.profile_result_dir)
|
||||
run_to_completion(profile_dir=profile_dir)
|
||||
return
|
||||
|
||||
# Benchmark.
|
||||
|
Loading…
x
Reference in New Issue
Block a user