[Frontend] Make beam search emulator temperature modifiable (#8928)
Co-authored-by: Eduard Balzin <nfunctor@yahoo.fr>
This commit is contained in:
parent
e1a3f5e831
commit
090e945e36
@ -396,6 +396,7 @@ class LLM:
|
|||||||
beam_width: int,
|
beam_width: int,
|
||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
ignore_eos: bool = False,
|
ignore_eos: bool = False,
|
||||||
|
temperature: float = 0.0,
|
||||||
) -> List[BeamSearchOutput]:
|
) -> List[BeamSearchOutput]:
|
||||||
"""
|
"""
|
||||||
Generate sequences using beam search.
|
Generate sequences using beam search.
|
||||||
@ -405,6 +406,7 @@ class LLM:
|
|||||||
of token IDs.
|
of token IDs.
|
||||||
beam_width: The number of beams to keep at each step.
|
beam_width: The number of beams to keep at each step.
|
||||||
max_tokens: The max number of tokens to generate for each prompt.
|
max_tokens: The max number of tokens to generate for each prompt.
|
||||||
|
temperature: The temperature to use for generation.
|
||||||
|
|
||||||
TODO: how does beam search work together with length penalty, frequency
|
TODO: how does beam search work together with length penalty, frequency
|
||||||
penalty, and stopping criteria, etc.?
|
penalty, and stopping criteria, etc.?
|
||||||
@ -416,7 +418,7 @@ class LLM:
|
|||||||
# at https://github.com/huggingface/transformers/blob/e15687fffe5c9d20598a19aeab721ae0a7580f8a/src/transformers/generation/beam_search.py#L534 # noqa
|
# at https://github.com/huggingface/transformers/blob/e15687fffe5c9d20598a19aeab721ae0a7580f8a/src/transformers/generation/beam_search.py#L534 # noqa
|
||||||
beam_search_params = SamplingParams(logprobs=2 * beam_width,
|
beam_search_params = SamplingParams(logprobs=2 * beam_width,
|
||||||
max_tokens=1,
|
max_tokens=1,
|
||||||
temperature=0.0)
|
temperature=temperature)
|
||||||
instances: List[BeamSearchInstance] = []
|
instances: List[BeamSearchInstance] = []
|
||||||
|
|
||||||
for prompt in prompts:
|
for prompt in prompts:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user