[Benchmark] Do not save detailed info to json by default (#14879)
Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
a73e183e36
commit
583a9778e0
@ -14,7 +14,8 @@ from tqdm.asyncio import tqdm
|
|||||||
from transformers import (AutoTokenizer, PreTrainedTokenizer,
|
from transformers import (AutoTokenizer, PreTrainedTokenizer,
|
||||||
PreTrainedTokenizerFast)
|
PreTrainedTokenizerFast)
|
||||||
|
|
||||||
from vllm.model_executor.model_loader.weight_utils import get_lock
|
# NOTE(simon): do not import vLLM here so the benchmark script
|
||||||
|
# can run without vLLM installed.
|
||||||
|
|
||||||
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
|
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
|
||||||
|
|
||||||
@ -427,6 +428,8 @@ def get_model(pretrained_model_name_or_path: str) -> str:
|
|||||||
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
|
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
|
||||||
from modelscope import snapshot_download
|
from modelscope import snapshot_download
|
||||||
|
|
||||||
|
from vllm.model_executor.model_loader.weight_utils import get_lock
|
||||||
|
|
||||||
# Use file lock to prevent multiple processes from
|
# Use file lock to prevent multiple processes from
|
||||||
# downloading the same model weights at the same time.
|
# downloading the same model weights at the same time.
|
||||||
with get_lock(pretrained_model_name_or_path):
|
with get_lock(pretrained_model_name_or_path):
|
||||||
|
@ -684,6 +684,15 @@ def main(args: argparse.Namespace):
|
|||||||
"Invalid metadata format. Please use KEY=VALUE format."
|
"Invalid metadata format. Please use KEY=VALUE format."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not args.save_detailed:
|
||||||
|
# Remove fields with too many data points
|
||||||
|
for field in [
|
||||||
|
"input_lens", "output_lens", "ttfts", "itls",
|
||||||
|
"generated_texts", "errors"
|
||||||
|
]:
|
||||||
|
if field in result_json:
|
||||||
|
del result_json[field]
|
||||||
|
|
||||||
# Traffic
|
# Traffic
|
||||||
result_json["request_rate"] = (args.request_rate if args.request_rate
|
result_json["request_rate"] = (args.request_rate if args.request_rate
|
||||||
< float("inf") else "inf")
|
< float("inf") else "inf")
|
||||||
@ -828,6 +837,12 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Specify to save benchmark results to a json file",
|
help="Specify to save benchmark results to a json file",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--save-detailed",
|
||||||
|
action="store_true",
|
||||||
|
help="When saving the results, whether to include per request "
|
||||||
|
"information such as response, error, ttfs, tpots, etc.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--metadata",
|
"--metadata",
|
||||||
metavar="KEY=VALUE",
|
metavar="KEY=VALUE",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user