[Frontend][Misc] Goodput metric support (#9338)
This commit is contained in:
parent
4fa3e33349
commit
855e0e6f97
@ -53,6 +53,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from argparse import ArgumentParser as FlexibleArgumentParser
|
from argparse import ArgumentParser as FlexibleArgumentParser
|
||||||
|
|
||||||
|
MILLISECONDS_TO_SECONDS_CONVERSION = 1000
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BenchmarkMetrics:
|
class BenchmarkMetrics:
|
||||||
@ -60,6 +62,7 @@ class BenchmarkMetrics:
|
|||||||
total_input: int
|
total_input: int
|
||||||
total_output: int
|
total_output: int
|
||||||
request_throughput: float
|
request_throughput: float
|
||||||
|
request_goodput: float
|
||||||
output_throughput: float
|
output_throughput: float
|
||||||
total_token_throughput: float
|
total_token_throughput: float
|
||||||
mean_ttft_ms: float
|
mean_ttft_ms: float
|
||||||
@ -316,12 +319,15 @@ def calculate_metrics(
|
|||||||
tokenizer: PreTrainedTokenizerBase,
|
tokenizer: PreTrainedTokenizerBase,
|
||||||
selected_percentile_metrics: List[str],
|
selected_percentile_metrics: List[str],
|
||||||
selected_percentiles: List[float],
|
selected_percentiles: List[float],
|
||||||
|
gootput_config_dict: Dict[str, float],
|
||||||
) -> Tuple[BenchmarkMetrics, List[int]]:
|
) -> Tuple[BenchmarkMetrics, List[int]]:
|
||||||
actual_output_lens: List[int] = []
|
actual_output_lens: List[int] = []
|
||||||
total_input = 0
|
total_input = 0
|
||||||
completed = 0
|
completed = 0
|
||||||
|
good_completed = 0
|
||||||
itls: List[float] = []
|
itls: List[float] = []
|
||||||
tpots: List[float] = []
|
tpots: List[float] = []
|
||||||
|
all_tpots: List[float] = []
|
||||||
ttfts: List[float] = []
|
ttfts: List[float] = []
|
||||||
e2els: List[float] = []
|
e2els: List[float] = []
|
||||||
for i in range(len(outputs)):
|
for i in range(len(outputs)):
|
||||||
@ -335,9 +341,13 @@ def calculate_metrics(
|
|||||||
add_special_tokens=False).input_ids)
|
add_special_tokens=False).input_ids)
|
||||||
actual_output_lens.append(output_len)
|
actual_output_lens.append(output_len)
|
||||||
total_input += input_requests[i][1]
|
total_input += input_requests[i][1]
|
||||||
|
tpot = 0
|
||||||
if output_len > 1:
|
if output_len > 1:
|
||||||
tpots.append(
|
tpot = (outputs[i].latency - outputs[i].ttft) / (output_len -
|
||||||
(outputs[i].latency - outputs[i].ttft) / (output_len - 1))
|
1)
|
||||||
|
tpots.append(tpot)
|
||||||
|
# Note: if output_len <= 1, we regard tpot as 0 for goodput
|
||||||
|
all_tpots.append(tpot)
|
||||||
itls += outputs[i].itl
|
itls += outputs[i].itl
|
||||||
ttfts.append(outputs[i].ttft)
|
ttfts.append(outputs[i].ttft)
|
||||||
e2els.append(outputs[i].latency)
|
e2els.append(outputs[i].latency)
|
||||||
@ -345,6 +355,28 @@ def calculate_metrics(
|
|||||||
else:
|
else:
|
||||||
actual_output_lens.append(0)
|
actual_output_lens.append(0)
|
||||||
|
|
||||||
|
if gootput_config_dict:
|
||||||
|
valid_metrics = []
|
||||||
|
slo_values = []
|
||||||
|
|
||||||
|
if "ttft" in gootput_config_dict:
|
||||||
|
valid_metrics.append(ttfts)
|
||||||
|
slo_values.append(gootput_config_dict["ttft"] /
|
||||||
|
MILLISECONDS_TO_SECONDS_CONVERSION)
|
||||||
|
if "tpot" in gootput_config_dict:
|
||||||
|
valid_metrics.append(all_tpots)
|
||||||
|
slo_values.append(gootput_config_dict["tpot"] /
|
||||||
|
MILLISECONDS_TO_SECONDS_CONVERSION)
|
||||||
|
if "e2el" in gootput_config_dict:
|
||||||
|
valid_metrics.append(e2els)
|
||||||
|
slo_values.append(gootput_config_dict["e2el"] /
|
||||||
|
MILLISECONDS_TO_SECONDS_CONVERSION)
|
||||||
|
|
||||||
|
for req_metric in zip(*valid_metrics):
|
||||||
|
is_good_req = all([s >= r for s, r in zip(slo_values, req_metric)])
|
||||||
|
if is_good_req:
|
||||||
|
good_completed += 1
|
||||||
|
|
||||||
if completed == 0:
|
if completed == 0:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"All requests failed. This is likely due to a misconfiguration "
|
"All requests failed. This is likely due to a misconfiguration "
|
||||||
@ -355,6 +387,7 @@ def calculate_metrics(
|
|||||||
total_input=total_input,
|
total_input=total_input,
|
||||||
total_output=sum(actual_output_lens),
|
total_output=sum(actual_output_lens),
|
||||||
request_throughput=completed / dur_s,
|
request_throughput=completed / dur_s,
|
||||||
|
request_goodput=good_completed / dur_s,
|
||||||
output_throughput=sum(actual_output_lens) / dur_s,
|
output_throughput=sum(actual_output_lens) / dur_s,
|
||||||
total_token_throughput=(total_input + sum(actual_output_lens)) / dur_s,
|
total_token_throughput=(total_input + sum(actual_output_lens)) / dur_s,
|
||||||
mean_ttft_ms=np.mean(ttfts or 0) *
|
mean_ttft_ms=np.mean(ttfts or 0) *
|
||||||
@ -398,6 +431,7 @@ async def benchmark(
|
|||||||
selected_percentile_metrics: List[str],
|
selected_percentile_metrics: List[str],
|
||||||
selected_percentiles: List[str],
|
selected_percentiles: List[str],
|
||||||
ignore_eos: bool,
|
ignore_eos: bool,
|
||||||
|
gootput_config_dict: Dict[str, float],
|
||||||
max_concurrency: Optional[int],
|
max_concurrency: Optional[int],
|
||||||
):
|
):
|
||||||
if backend in ASYNC_REQUEST_FUNCS:
|
if backend in ASYNC_REQUEST_FUNCS:
|
||||||
@ -512,6 +546,7 @@ async def benchmark(
|
|||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
selected_percentile_metrics=selected_percentile_metrics,
|
selected_percentile_metrics=selected_percentile_metrics,
|
||||||
selected_percentiles=selected_percentiles,
|
selected_percentiles=selected_percentiles,
|
||||||
|
gootput_config_dict=gootput_config_dict,
|
||||||
)
|
)
|
||||||
|
|
||||||
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
|
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
|
||||||
@ -523,6 +558,9 @@ async def benchmark(
|
|||||||
metrics.total_output))
|
metrics.total_output))
|
||||||
print("{:<40} {:<10.2f}".format("Request throughput (req/s):",
|
print("{:<40} {:<10.2f}".format("Request throughput (req/s):",
|
||||||
metrics.request_throughput))
|
metrics.request_throughput))
|
||||||
|
if gootput_config_dict:
|
||||||
|
print("{:<40} {:<10.2f}".format("Request goodput (req/s):",
|
||||||
|
metrics.request_goodput))
|
||||||
print("{:<40} {:<10.2f}".format("Output token throughput (tok/s):",
|
print("{:<40} {:<10.2f}".format("Output token throughput (tok/s):",
|
||||||
metrics.output_throughput))
|
metrics.output_throughput))
|
||||||
print("{:<40} {:<10.2f}".format("Total Token throughput (tok/s):",
|
print("{:<40} {:<10.2f}".format("Total Token throughput (tok/s):",
|
||||||
@ -534,6 +572,8 @@ async def benchmark(
|
|||||||
"total_input_tokens": metrics.total_input,
|
"total_input_tokens": metrics.total_input,
|
||||||
"total_output_tokens": metrics.total_output,
|
"total_output_tokens": metrics.total_output,
|
||||||
"request_throughput": metrics.request_throughput,
|
"request_throughput": metrics.request_throughput,
|
||||||
|
"request_goodput:":
|
||||||
|
metrics.request_goodput if gootput_config_dict else None,
|
||||||
"output_throughput": metrics.output_throughput,
|
"output_throughput": metrics.output_throughput,
|
||||||
"total_token_throughput": metrics.total_token_throughput,
|
"total_token_throughput": metrics.total_token_throughput,
|
||||||
"input_lens": [output.prompt_len for output in outputs],
|
"input_lens": [output.prompt_len for output in outputs],
|
||||||
@ -587,6 +627,41 @@ async def benchmark(
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def check_goodput_args(args):
|
||||||
|
# Check and parse goodput arguments
|
||||||
|
gootput_config_dict = {}
|
||||||
|
VALID_NAMES = ["ttft", "tpot", "e2el"]
|
||||||
|
if args.goodput:
|
||||||
|
gootput_config_dict = parse_goodput(args.goodput)
|
||||||
|
for slo_name, slo_val in gootput_config_dict.items():
|
||||||
|
if slo_name not in VALID_NAMES:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid metric name found, {slo_name}: {slo_val}. "
|
||||||
|
"The service level objective name should be one of "
|
||||||
|
f"{str(VALID_NAMES)}. ")
|
||||||
|
if slo_val < 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid value found, {slo_name}: {slo_val}. "
|
||||||
|
"The service level objective value should be "
|
||||||
|
"non-negative.")
|
||||||
|
return gootput_config_dict
|
||||||
|
|
||||||
|
|
||||||
|
def parse_goodput(slo_pairs):
|
||||||
|
gootput_config_dict = {}
|
||||||
|
try:
|
||||||
|
for slo_pair in slo_pairs:
|
||||||
|
slo_name, slo_val = slo_pair.split(":")
|
||||||
|
gootput_config_dict[slo_name] = float(slo_val)
|
||||||
|
except ValueError as err:
|
||||||
|
raise argparse.ArgumentTypeError(
|
||||||
|
"Invalid format found for service level objectives. "
|
||||||
|
"Specify service level objectives for goodput as \"KEY:VALUE\" "
|
||||||
|
"pairs, where the key is a metric name, and the value is a "
|
||||||
|
"number in milliseconds.") from err
|
||||||
|
return gootput_config_dict
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
print(args)
|
print(args)
|
||||||
random.seed(args.seed)
|
random.seed(args.seed)
|
||||||
@ -681,6 +756,8 @@ def main(args: argparse.Namespace):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown dataset: {args.dataset_name}")
|
raise ValueError(f"Unknown dataset: {args.dataset_name}")
|
||||||
|
|
||||||
|
gootput_config_dict = check_goodput_args(args)
|
||||||
|
|
||||||
benchmark_result = asyncio.run(
|
benchmark_result = asyncio.run(
|
||||||
benchmark(
|
benchmark(
|
||||||
backend=backend,
|
backend=backend,
|
||||||
@ -699,6 +776,7 @@ def main(args: argparse.Namespace):
|
|||||||
float(p) for p in args.metric_percentiles.split(",")
|
float(p) for p in args.metric_percentiles.split(",")
|
||||||
],
|
],
|
||||||
ignore_eos=args.ignore_eos,
|
ignore_eos=args.ignore_eos,
|
||||||
|
gootput_config_dict=gootput_config_dict,
|
||||||
max_concurrency=args.max_concurrency,
|
max_concurrency=args.max_concurrency,
|
||||||
))
|
))
|
||||||
|
|
||||||
@ -915,6 +993,17 @@ if __name__ == "__main__":
|
|||||||
"Default value is \"99\". "
|
"Default value is \"99\". "
|
||||||
"Use \"--percentile-metrics\" to select metrics.",
|
"Use \"--percentile-metrics\" to select metrics.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--goodput",
|
||||||
|
nargs="+",
|
||||||
|
required=False,
|
||||||
|
help="Specify service level objectives for goodput as \"KEY:VALUE\" "
|
||||||
|
"pairs, where the key is a metric name, and the value is in "
|
||||||
|
"milliseconds. Multiple \"KEY:VALUE\" pairs can be provided, "
|
||||||
|
"separated by spaces. Allowed request level metric names are "
|
||||||
|
"\"ttft\", \"tpot\", \"e2el\". For more context on the definition of "
|
||||||
|
"goodput, refer to DistServe paper: https://arxiv.org/pdf/2401.09670 "
|
||||||
|
"and the blog: https://hao-ai-lab.github.io/blogs/distserve")
|
||||||
|
|
||||||
# group for dataset specific arguments
|
# group for dataset specific arguments
|
||||||
sonnet_group = parser.add_argument_group("sonnet dataset options")
|
sonnet_group = parser.add_argument_group("sonnet dataset options")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user