vllm/examples/offline_inference/llm_engine_example.py

# SPDX-License-Identifier: Apache-2.0

import argparse
from typing import List, Tuple

from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
from vllm.utils import FlexibleArgumentParser


def create_test_prompts() -> List[Tuple[str, SamplingParams]]:
    """Create a list of test prompts with their sampling parameters."""
    return [
        ("A robot may not injure a human being",
         SamplingParams(temperature=0.0, logprobs=1, prompt_logprobs=1)),
        ("To be or not to be,",
         SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
        ("What is the meaning of life?",
         SamplingParams(n=2,
                        best_of=5,
                        temperature=0.8,
                        top_p=0.95,
                        frequency_penalty=0.1)),
    ]


def process_requests(engine: LLMEngine,
                     test_prompts: List[Tuple[str, SamplingParams]]):
    """Continuously process a list of prompts and handle the outputs."""
    request_id = 0

    while test_prompts or engine.has_unfinished_requests():
        if test_prompts:
            prompt, sampling_params = test_prompts.pop(0)
            engine.add_request(str(request_id), prompt, sampling_params)
            request_id += 1

        request_outputs: List[RequestOutput] = engine.step()

        for request_output in request_outputs:
            if request_output.finished:
                print(request_output)


def initialize_engine(args: argparse.Namespace) -> LLMEngine:
    """Initialize the LLMEngine from the command line arguments."""
    engine_args = EngineArgs.from_cli_args(args)
    return LLMEngine.from_engine_args(engine_args)


def main(args: argparse.Namespace):
    """Main function that sets up and runs the prompt processing."""
    engine = initialize_engine(args)
    test_prompts = create_test_prompts()
    process_requests(engine, test_prompts)


if __name__ == '__main__':
    parser = FlexibleArgumentParser(
        description='Demo on using the LLMEngine class directly')
    parser = EngineArgs.add_cli_args(parser)
    args = parser.parse_args()
    main(args)
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`

Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`import argparse`
Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00			`from typing import List, Tuple`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00
[CI] Try introducing isort. (#3495) 2024-03-25 23:59:47 +09:00			`from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams`
[Frontend] Add FlexibleArgumentParser to support both underscore and dash in names (#5718) 2024-06-20 19:00:13 -04:00			`from vllm.utils import FlexibleArgumentParser`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00

Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00			`def create_test_prompts() -> List[Tuple[str, SamplingParams]]:`
			`"""Create a list of test prompts with their sampling parameters."""`
			`return [`
Add Falcon support (new) (#592) 2023-08-02 14:04:39 -07:00			`("A robot may not injure a human being",`
Implement prompt logprobs & Batched topk for computing logprobs (#1328) Co-authored-by: Yunmo Chen <16273544+wanmok@users.noreply.github.com> 2023-10-16 10:56:50 -07:00			`SamplingParams(temperature=0.0, logprobs=1, prompt_logprobs=1)),`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`("To be or not to be,",`
			`SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),`
			`("What is the meaning of life?",`
[Quality] Add code formatter and linter (#326) 2023-07-03 11:31:55 -07:00			`SamplingParams(n=2,`
			`best_of=5,`
			`temperature=0.8,`
			`top_p=0.95,`
			`frequency_penalty=0.1)),`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`]`

Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00
			`def process_requests(engine: LLMEngine,`
			`test_prompts: List[Tuple[str, SamplingParams]]):`
			`"""Continuously process a list of prompts and handle the outputs."""`
Add throughput benchmarking script (#133) 2023-05-28 03:20:05 -07:00			`request_id = 0`
Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00
			`while test_prompts or engine.has_unfinished_requests():`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`if test_prompts:`
			`prompt, sampling_params = test_prompts.pop(0)`
Rename servers to engines (#152) 2023-06-17 17:25:21 +08:00			`engine.add_request(str(request_id), prompt, sampling_params)`
OpenAI Compatible Frontend (#116) 2023-05-23 21:39:50 -07:00			`request_id += 1`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00
Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00			`request_outputs: List[RequestOutput] = engine.step()`

Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`for request_output in request_outputs:`
[Bugfix] Fix a bug in RequestOutput.finished (#202) 2023-06-22 00:17:24 -07:00			`if request_output.finished:`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`print(request_output)`

Refactor LLMEngine demo script for clarity and modularity (#1413) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2023-10-30 18:14:37 +02:00
			`def initialize_engine(args: argparse.Namespace) -> LLMEngine:`
			`"""Initialize the LLMEngine from the command line arguments."""`
			`engine_args = EngineArgs.from_cli_args(args)`
			`return LLMEngine.from_engine_args(engine_args)`


			`def main(args: argparse.Namespace):`
			`"""Main function that sets up and runs the prompt processing."""`
			`engine = initialize_engine(args)`
			`test_prompts = create_test_prompts()`
			`process_requests(engine, test_prompts)`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00

			`if __name__ == '__main__':`
[Frontend] Add FlexibleArgumentParser to support both underscore and dash in names (#5718) 2024-06-20 19:00:13 -04:00			`parser = FlexibleArgumentParser(`
Rename servers to engines (#152) 2023-06-17 17:25:21 +08:00			`description='Demo on using the LLMEngine class directly')`
			`parser = EngineArgs.add_cli_args(parser)`
Refactor system architecture (#109) 2023-05-20 13:06:59 -07:00			`args = parser.parse_args()`
			`main(args)`