vllm/examples/offline_inference_chat.py

from vllm import LLM, SamplingParams

llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
sampling_params = SamplingParams(temperature=0.5)


def print_outputs(outputs):
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
    print("-" * 80)


print("=" * 80)

# In this script, we demonstrate how to pass input to the chat method:

conversation = [
    {
        "role": "system",
        "content": "You are a helpful assistant"
    },
    {
        "role": "user",
        "content": "Hello"
    },
    {
        "role": "assistant",
        "content": "Hello! How can I assist you today?"
    },
    {
        "role": "user",
        "content": "Write an essay about the importance of higher education.",
    },
]
outputs = llm.chat(conversation,
                   sampling_params=sampling_params,
                   use_tqdm=False)
print_outputs(outputs)

# A chat template can be optionally supplied.
# If not, the model will use its default chat template.

# with open('template_falcon_180b.jinja', "r") as f:
#     chat_template = f.read()

# outputs = llm.chat(
#     conversations,
#     sampling_params=sampling_params,
#     use_tqdm=False,
#     chat_template=chat_template,
# )
Chat method for offline llm (#5049) Co-authored-by: nunjunj <ray@g-3ff9f30f2ed650001.c.vllm-405802.internal> Co-authored-by: nunjunj <ray@g-1df6075697c3f0001.c.vllm-405802.internal> Co-authored-by: nunjunj <ray@g-c5a2c23abc49e0001.c.vllm-405802.internal> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-08-16 09:41:34 +07:00			`from vllm import LLM, SamplingParams`

			`llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")`
			`sampling_params = SamplingParams(temperature=0.5)`


			`def print_outputs(outputs):`
			`for output in outputs:`
			`prompt = output.prompt`
			`generated_text = output.outputs[0].text`
			`print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")`
			`print("-" * 80)`


			`print("=" * 80)`

			`# In this script, we demonstrate how to pass input to the chat method:`

			`conversation = [`
			`{`
			`"role": "system",`
			`"content": "You are a helpful assistant"`
			`},`
			`{`
			`"role": "user",`
			`"content": "Hello"`
			`},`
			`{`
			`"role": "assistant",`
			`"content": "Hello! How can I assist you today?"`
			`},`
			`{`
			`"role": "user",`
			`"content": "Write an essay about the importance of higher education.",`
			`},`
			`]`
			`outputs = llm.chat(conversation,`
			`sampling_params=sampling_params,`
			`use_tqdm=False)`
			`print_outputs(outputs)`

			`# A chat template can be optionally supplied.`
			`# If not, the model will use its default chat template.`

			`# with open('template_falcon_180b.jinja', "r") as f:`
			`# chat_template = f.read()`

			`# outputs = llm.chat(`
			`# conversations,`
			`# sampling_params=sampling_params,`
			`# use_tqdm=False,`
			`# chat_template=chat_template,`
			`# )`