From 63d2705edbe7fbf4d581ef49503725f3481e04c7 Mon Sep 17 00:00:00 2001 From: Jennifer Zhao Date: Sun, 13 Apr 2025 17:20:26 -0700 Subject: [PATCH] [Benchmark][Bugfix] Fix SonnetDataset default values in benchmark_throughput.py (#16556) --- benchmarks/benchmark_throughput.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 39e4a34b..67e509c1 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -597,18 +597,26 @@ if __name__ == "__main__": parser.add_argument( "--prefix-len", type=int, - default=0, - help="Number of fixed prefix tokens before the random " - "context in a request (default: 0).", - ) + default=None, + help=f"Number of prefix tokens to be used in RandomDataset " + "and SonnetDataset. For RandomDataset, the total input " + "length is the sum of prefix-len (default: " + f"{RandomDataset.DEFAULT_PREFIX_LEN}) and a random context length " + "sampled from [input_len * (1 - range_ratio), " + "input_len * (1 + range_ratio)]. For SonnetDataset, " + f"prefix_len (default: {SonnetDataset.DEFAULT_PREFIX_LEN}) " + "controls how much of the input is fixed lines versus " + "random lines, but the total input length remains approximately " + "input_len tokens.") # random dataset parser.add_argument( "--random-range-ratio", type=float, - default=0.0, - help="Range ratio for sampling input/output length, " - "used only for RandomDataset. Must be in the range [0, 1) to define " - "a symmetric sampling range " + default=None, + help=f"Range ratio (default : {RandomDataset.DEFAULT_RANGE_RATIO}) " + "for sampling input/output length, " + "used only for RandomDataset. Must be in the range [0, 1) to " + "define a symmetric sampling range " "[length * (1 - range_ratio), length * (1 + range_ratio)].", )