2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-01-29 11:38:08 +08:00
|
|
|
"""
|
|
|
|
An example shows how to generate chat completions from reasoning models
|
|
|
|
like DeepSeekR1.
|
|
|
|
|
|
|
|
To run this example, you need to start the vLLM server with the reasoning
|
|
|
|
parser:
|
|
|
|
|
|
|
|
```bash
|
|
|
|
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
|
|
|
--enable-reasoning --reasoning-parser deepseek_r1
|
|
|
|
```
|
|
|
|
|
|
|
|
This example demonstrates how to generate chat completions from reasoning models
|
|
|
|
using the OpenAI Python client library.
|
|
|
|
"""
|
|
|
|
|
|
|
|
from openai import OpenAI
|
|
|
|
|
|
|
|
# Modify OpenAI's API key and API base to use vLLM's API server.
|
|
|
|
openai_api_key = "EMPTY"
|
|
|
|
openai_api_base = "http://localhost:8000/v1"
|
|
|
|
|
|
|
|
client = OpenAI(
|
|
|
|
api_key=openai_api_key,
|
|
|
|
base_url=openai_api_base,
|
|
|
|
)
|
|
|
|
|
|
|
|
models = client.models.list()
|
|
|
|
model = models.data[0].id
|
|
|
|
|
|
|
|
# Round 1
|
|
|
|
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
|
|
|
response = client.chat.completions.create(model=model, messages=messages)
|
|
|
|
|
|
|
|
reasoning_content = response.choices[0].message.reasoning_content
|
|
|
|
content = response.choices[0].message.content
|
|
|
|
|
|
|
|
print("reasoning_content:", reasoning_content)
|
|
|
|
print("content:", content)
|
|
|
|
|
|
|
|
# Round 2
|
|
|
|
messages.append({"role": "assistant", "content": content})
|
|
|
|
messages.append({
|
|
|
|
"role": "user",
|
|
|
|
"content": "How many Rs are there in the word 'strawberry'?",
|
|
|
|
})
|
|
|
|
response = client.chat.completions.create(model=model, messages=messages)
|
|
|
|
|
|
|
|
reasoning_content = response.choices[0].message.reasoning_content
|
|
|
|
content = response.choices[0].message.content
|
|
|
|
|
|
|
|
print("reasoning_content:", reasoning_content)
|
|
|
|
print("content:", content)
|