165 lines
4.4 KiB
Python
165 lines
4.4 KiB
Python
![]() |
# ruff: noqa
|
||
|
import argparse
|
||
|
|
||
|
from vllm import LLM
|
||
|
from vllm.sampling_params import SamplingParams
|
||
|
|
||
|
# This script is an offline demo for running Pixtral.
|
||
|
#
|
||
|
# If you want to run a server/client setup, please follow this code:
|
||
|
#
|
||
|
# - Server:
|
||
|
#
|
||
|
# ```bash
|
||
|
# vllm serve mistralai/Pixtral-12B-2409 --tokenizer_mode mistral --limit_mm_per_prompt 'image=4' --max_num_batched_tokens 16384
|
||
|
# ```
|
||
|
#
|
||
|
# - Client:
|
||
|
#
|
||
|
# ```bash
|
||
|
# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
|
||
|
# --header 'Content-Type: application/json' \
|
||
|
# --header 'Authorization: Bearer token' \
|
||
|
# --data '{
|
||
|
# "model": "mistralai/Pixtral-12B-2409",
|
||
|
# "messages": [
|
||
|
# {
|
||
|
# "role": "user",
|
||
|
# "content": [
|
||
|
# {"type" : "text", "text": "Describe this image in detail please."},
|
||
|
# {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
|
||
|
# {"type" : "text", "text": "and this one as well. Answer in French."},
|
||
|
# {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
|
||
|
# ]
|
||
|
# }
|
||
|
# ]
|
||
|
# }'
|
||
|
# ```
|
||
|
#
|
||
|
# Usage:
|
||
|
# python demo.py simple
|
||
|
# python demo.py advanced
|
||
|
|
||
|
|
||
|
def run_simple_demo():
|
||
|
model_name = "mistralai/Pixtral-12B-2409"
|
||
|
sampling_params = SamplingParams(max_tokens=8192)
|
||
|
|
||
|
llm = LLM(model=model_name, tokenizer_mode="mistral")
|
||
|
|
||
|
prompt = "Describe this image in one sentence."
|
||
|
image_url = "https://picsum.photos/id/237/200/300"
|
||
|
|
||
|
messages = [
|
||
|
{
|
||
|
"role":
|
||
|
"user",
|
||
|
"content": [
|
||
|
{
|
||
|
"type": "text",
|
||
|
"text": prompt
|
||
|
},
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {
|
||
|
"url": image_url
|
||
|
}
|
||
|
},
|
||
|
],
|
||
|
},
|
||
|
]
|
||
|
outputs = llm.chat(messages, sampling_params=sampling_params)
|
||
|
|
||
|
print(outputs[0].outputs[0].text)
|
||
|
|
||
|
|
||
|
def run_advanced_demo():
|
||
|
model_name = "mistralai/Pixtral-12B-2409"
|
||
|
max_img_per_msg = 5
|
||
|
max_tokens_per_img = 4096
|
||
|
|
||
|
sampling_params = SamplingParams(max_tokens=8192, temperature=0.7)
|
||
|
llm = LLM(
|
||
|
model=model_name,
|
||
|
tokenizer_mode="mistral",
|
||
|
limit_mm_per_prompt={"image": max_img_per_msg},
|
||
|
max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
|
||
|
)
|
||
|
|
||
|
prompt = "Describe the following image."
|
||
|
|
||
|
url_1 = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
|
||
|
url_2 = "https://picsum.photos/seed/picsum/200/300"
|
||
|
url_3 = "https://picsum.photos/id/32/512/512"
|
||
|
|
||
|
messages = [
|
||
|
{
|
||
|
"role":
|
||
|
"user",
|
||
|
"content": [
|
||
|
{
|
||
|
"type": "text",
|
||
|
"text": prompt
|
||
|
},
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {
|
||
|
"url": url_1
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {
|
||
|
"url": url_2
|
||
|
}
|
||
|
},
|
||
|
],
|
||
|
},
|
||
|
{
|
||
|
"role": "assistant",
|
||
|
"content": "The images show nature.",
|
||
|
},
|
||
|
{
|
||
|
"role": "user",
|
||
|
"content": "More details please and answer only in French!.",
|
||
|
},
|
||
|
{
|
||
|
"role": "user",
|
||
|
"content": [
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {
|
||
|
"url": url_3
|
||
|
}
|
||
|
},
|
||
|
],
|
||
|
},
|
||
|
]
|
||
|
|
||
|
outputs = llm.chat(messages=messages, sampling_params=sampling_params)
|
||
|
print(outputs[0].outputs[0].text)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
parser = argparse.ArgumentParser(
|
||
|
description="Run a demo in simple or advanced mode.")
|
||
|
|
||
|
parser.add_argument(
|
||
|
"mode",
|
||
|
choices=["simple", "advanced"],
|
||
|
help="Specify the demo mode: 'simple' or 'advanced'",
|
||
|
)
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if args.mode == "simple":
|
||
|
print("Running simple demo...")
|
||
|
run_simple_demo()
|
||
|
elif args.mode == "advanced":
|
||
|
print("Running advanced demo...")
|
||
|
run_advanced_demo()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|