check input length of sonnet samples (#16423)

Signed-off-by: alexey-belyakov <alexey.belyakov@intel.com>
This commit is contained in:
Alexey Belyakov 2025-04-11 03:15:06 +01:00 committed by GitHub
parent 268c325078
commit 3e397a9484
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset):
prefix_lines = self.data[:num_prefix_lines]
samples = []
for _ in range(num_requests):
while len(samples) < num_requests:
extra_lines = random.choices(self.data,
k=num_input_lines - num_prefix_lines)
prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}"
@ -497,13 +497,14 @@ class SonnetDataset(BenchmarkDataset):
prompt_formatted = tokenizer.apply_chat_template(
msg, add_generation_prompt=True, tokenize=False)
prompt_len = len(tokenizer(prompt_formatted).input_ids)
samples.append(
SampleRequest(
prompt=prompt_formatted
if return_prompt_formatted else prompt,
prompt_len=prompt_len,
expected_output_len=output_len,
))
if prompt_len <= input_len:
samples.append(
SampleRequest(
prompt=prompt_formatted
if return_prompt_formatted else prompt,
prompt_len=prompt_len,
expected_output_len=output_len,
))
return samples