[Misc] refactor examples series (#16708)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
21378a2323
commit
7168920491
@ -50,6 +50,13 @@ def initialize_engine(args: argparse.Namespace) -> LLMEngine:
|
|||||||
return LLMEngine.from_engine_args(engine_args)
|
return LLMEngine.from_engine_args(engine_args)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = FlexibleArgumentParser(
|
||||||
|
description='Demo on using the LLMEngine class directly')
|
||||||
|
parser = EngineArgs.add_cli_args(parser)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
"""Main function that sets up and runs the prompt processing."""
|
"""Main function that sets up and runs the prompt processing."""
|
||||||
engine = initialize_engine(args)
|
engine = initialize_engine(args)
|
||||||
@ -58,8 +65,5 @@ def main(args: argparse.Namespace):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = FlexibleArgumentParser(
|
args = parse_args()
|
||||||
description='Demo on using the LLMEngine class directly')
|
|
||||||
parser = EngineArgs.add_cli_args(parser)
|
|
||||||
args = parser.parse_args()
|
|
||||||
main(args)
|
main(args)
|
||||||
|
@ -23,10 +23,6 @@ import gradio as gr
|
|||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
|
|
||||||
def create_openai_client(api_key, base_url):
|
|
||||||
return OpenAI(api_key=api_key, base_url=base_url)
|
|
||||||
|
|
||||||
|
|
||||||
def format_history_to_openai(history):
|
def format_history_to_openai(history):
|
||||||
history_openai_format = [{
|
history_openai_format = [{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
@ -303,12 +303,7 @@ example_function_map = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def main(args) -> None:
|
def parse_args():
|
||||||
chat_type = args.chat_type
|
|
||||||
example_function_map[chat_type]()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = FlexibleArgumentParser(
|
parser = FlexibleArgumentParser(
|
||||||
description='Demo on using OpenAI client for online serving with '
|
description='Demo on using OpenAI client for online serving with '
|
||||||
'multimodal language models served with vLLM.')
|
'multimodal language models served with vLLM.')
|
||||||
@ -318,5 +313,14 @@ if __name__ == "__main__":
|
|||||||
default="single-image",
|
default="single-image",
|
||||||
choices=list(example_function_map.keys()),
|
choices=list(example_function_map.keys()),
|
||||||
help='Conversation type with multimodal data.')
|
help='Conversation type with multimodal data.')
|
||||||
args = parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main(args) -> None:
|
||||||
|
chat_type = args.chat_type
|
||||||
|
example_function_map[chat_type]()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
@ -18,15 +18,6 @@ from openai import OpenAI
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
# defaults to os.environ.get("OPENAI_API_KEY")
|
|
||||||
api_key=openai_api_key,
|
|
||||||
base_url=openai_api_base,
|
|
||||||
)
|
|
||||||
|
|
||||||
models = client.models.list()
|
|
||||||
model = models.data[0].id
|
|
||||||
|
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
@ -116,6 +107,17 @@ messages = [
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
client = OpenAI(
|
||||||
|
# defaults to os.environ.get("OPENAI_API_KEY")
|
||||||
|
api_key=openai_api_key,
|
||||||
|
base_url=openai_api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
models = client.models.list()
|
||||||
|
model = models.data[0].id
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
model=model,
|
model=model,
|
||||||
@ -134,3 +136,7 @@ chat_completion = client.chat.completions.create(messages=messages,
|
|||||||
tool_choice="required")
|
tool_choice="required")
|
||||||
|
|
||||||
print(chat_completion.choices[0].message.tool_calls)
|
print(chat_completion.choices[0].message.tool_calls)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
An example shows how to generate chat completions from reasoning models
|
An example shows how to generate chat completions from reasoning models
|
||||||
like DeepSeekR1.
|
like DeepSeekR1.
|
||||||
|
|
||||||
To run this example, you need to start the vLLM server with the reasoning
|
To run this example, you need to start the vLLM server
|
||||||
parser:
|
with the reasoning parser:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
||||||
@ -21,6 +21,8 @@ from openai import OpenAI
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
base_url=openai_api_base,
|
base_url=openai_api_base,
|
||||||
@ -31,6 +33,7 @@ model = models.data[0].id
|
|||||||
|
|
||||||
# Round 1
|
# Round 1
|
||||||
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
||||||
|
# ruff: noqa: E501
|
||||||
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
||||||
response = client.chat.completions.create(model=model, messages=messages)
|
response = client.chat.completions.create(model=model, messages=messages)
|
||||||
|
|
||||||
@ -43,8 +46,10 @@ print("content for Round 1:", content)
|
|||||||
# Round 2
|
# Round 2
|
||||||
messages.append({"role": "assistant", "content": content})
|
messages.append({"role": "assistant", "content": content})
|
||||||
messages.append({
|
messages.append({
|
||||||
"role": "user",
|
"role":
|
||||||
"content": "How many Rs are there in the word 'strawberry'?",
|
"user",
|
||||||
|
"content":
|
||||||
|
"How many Rs are there in the word 'strawberry'?",
|
||||||
})
|
})
|
||||||
response = client.chat.completions.create(model=model, messages=messages)
|
response = client.chat.completions.create(model=model, messages=messages)
|
||||||
|
|
||||||
@ -53,3 +58,7 @@ content = response.choices[0].message.content
|
|||||||
|
|
||||||
print("reasoning_content for Round 2:", reasoning_content)
|
print("reasoning_content for Round 2:", reasoning_content)
|
||||||
print("content for Round 2:", content)
|
print("content for Round 2:", content)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -29,6 +29,10 @@ from openai import OpenAI
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
base_url=openai_api_base,
|
base_url=openai_api_base,
|
||||||
@ -37,8 +41,8 @@ client = OpenAI(
|
|||||||
models = client.models.list()
|
models = client.models.list()
|
||||||
model = models.data[0].id
|
model = models.data[0].id
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
# ruff: noqa: E501
|
||||||
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
# For granite: add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
||||||
stream = client.chat.completions.create(model=model,
|
stream = client.chat.completions.create(model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True)
|
stream=True)
|
||||||
@ -67,3 +71,7 @@ for chunk in stream:
|
|||||||
print("\ncontent:", end="", flush=True)
|
print("\ncontent:", end="", flush=True)
|
||||||
# Extract and print the content
|
# Extract and print the content
|
||||||
print(content, end="", flush=True)
|
print(content, end="", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -98,7 +98,7 @@ def dse_qwen2_vl(inp: dict):
|
|||||||
print("Embedding output:", response_json["data"][0]["embedding"])
|
print("Embedding output:", response_json["data"][0]["embedding"])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
"Script to call a specified VLM through the API. Make sure to serve "
|
"Script to call a specified VLM through the API. Make sure to serve "
|
||||||
"the model with --task embed before running this.")
|
"the model with --task embed before running this.")
|
||||||
@ -107,8 +107,10 @@ if __name__ == '__main__':
|
|||||||
choices=["vlm2vec", "dse_qwen2_vl"],
|
choices=["vlm2vec", "dse_qwen2_vl"],
|
||||||
required=True,
|
required=True,
|
||||||
help="Which model to call.")
|
help="Which model to call.")
|
||||||
args = parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
if args.model == "vlm2vec":
|
if args.model == "vlm2vec":
|
||||||
vlm2vec()
|
vlm2vec()
|
||||||
elif args.model == "dse_qwen2_vl":
|
elif args.model == "dse_qwen2_vl":
|
||||||
@ -120,3 +122,8 @@ if __name__ == '__main__':
|
|||||||
"type": "text",
|
"type": "text",
|
||||||
"content": "What is the weather like today?",
|
"content": "What is the weather like today?",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
main(args)
|
||||||
|
@ -6,6 +6,8 @@ from openai import OpenAI
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# defaults to os.environ.get("OPENAI_API_KEY")
|
# defaults to os.environ.get("OPENAI_API_KEY")
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
@ -25,9 +27,15 @@ completion = client.completions.create(
|
|||||||
stream=stream,
|
stream=stream,
|
||||||
logprobs=3)
|
logprobs=3)
|
||||||
|
|
||||||
|
print("-" * 50)
|
||||||
print("Completion results:")
|
print("Completion results:")
|
||||||
if stream:
|
if stream:
|
||||||
for c in completion:
|
for c in completion:
|
||||||
print(c)
|
print(c)
|
||||||
else:
|
else:
|
||||||
print(completion)
|
print(completion)
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -16,13 +16,15 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response:
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--host", type=str, default="localhost")
|
parser.add_argument("--host", type=str, default="localhost")
|
||||||
parser.add_argument("--port", type=int, default=8000)
|
parser.add_argument("--port", type=int, default=8000)
|
||||||
parser.add_argument("--model", type=str, default="BAAI/bge-reranker-v2-m3")
|
parser.add_argument("--model", type=str, default="BAAI/bge-reranker-v2-m3")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
def main(args):
|
||||||
api_url = f"http://{args.host}:{args.port}/score"
|
api_url = f"http://{args.host}:{args.port}/score"
|
||||||
model_name = args.model
|
model_name = args.model
|
||||||
|
|
||||||
@ -30,9 +32,9 @@ if __name__ == "__main__":
|
|||||||
text_2 = "The capital of Brazil is Brasilia."
|
text_2 = "The capital of Brazil is Brasilia."
|
||||||
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
||||||
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
||||||
print("Prompt when text_1 and text_2 are both strings:")
|
print("\nPrompt when text_1 and text_2 are both strings:")
|
||||||
pprint.pprint(prompt)
|
pprint.pprint(prompt)
|
||||||
print("Score Response:")
|
print("\nScore Response:")
|
||||||
pprint.pprint(score_response.json())
|
pprint.pprint(score_response.json())
|
||||||
|
|
||||||
text_1 = "What is the capital of France?"
|
text_1 = "What is the capital of France?"
|
||||||
@ -41,9 +43,9 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
||||||
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
||||||
print("Prompt when text_1 is string and text_2 is a list:")
|
print("\nPrompt when text_1 is string and text_2 is a list:")
|
||||||
pprint.pprint(prompt)
|
pprint.pprint(prompt)
|
||||||
print("Score Response:")
|
print("\nScore Response:")
|
||||||
pprint.pprint(score_response.json())
|
pprint.pprint(score_response.json())
|
||||||
|
|
||||||
text_1 = [
|
text_1 = [
|
||||||
@ -54,7 +56,12 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
|
||||||
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
score_response = post_http_request(prompt=prompt, api_url=api_url)
|
||||||
print("Prompt when text_1 and text_2 are both lists:")
|
print("\nPrompt when text_1 and text_2 are both lists:")
|
||||||
pprint.pprint(prompt)
|
pprint.pprint(prompt)
|
||||||
print("Score Response:")
|
print("\nScore Response:")
|
||||||
pprint.pprint(score_response.json())
|
pprint.pprint(score_response.json())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_args()
|
||||||
|
main(args)
|
||||||
|
@ -6,6 +6,8 @@ from openai import OpenAI
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# defaults to os.environ.get("OPENAI_API_KEY")
|
# defaults to os.environ.get("OPENAI_API_KEY")
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
@ -16,6 +18,7 @@ models = client.models.list()
|
|||||||
model = models.data[0].id
|
model = models.data[0].id
|
||||||
|
|
||||||
responses = client.embeddings.create(
|
responses = client.embeddings.create(
|
||||||
|
# ruff: noqa: E501
|
||||||
input=[
|
input=[
|
||||||
"Hello my name is",
|
"Hello my name is",
|
||||||
"The best thing about vLLM is that it supports many different models"
|
"The best thing about vLLM is that it supports many different models"
|
||||||
@ -25,3 +28,7 @@ responses = client.embeddings.create(
|
|||||||
|
|
||||||
for data in responses.data:
|
for data in responses.data:
|
||||||
print(data.embedding) # List of float of len 4096
|
print(data.embedding) # List of float of len 4096
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -17,7 +17,7 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response:
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--host", type=str, default="localhost")
|
parser.add_argument("--host", type=str, default="localhost")
|
||||||
parser.add_argument("--port", type=int, default=8000)
|
parser.add_argument("--port", type=int, default=8000)
|
||||||
@ -25,15 +25,20 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
default="jason9693/Qwen2.5-1.5B-apeach")
|
default="jason9693/Qwen2.5-1.5B-apeach")
|
||||||
|
|
||||||
args = parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
api_url = f"http://{args.host}:{args.port}/pooling"
|
api_url = f"http://{args.host}:{args.port}/pooling"
|
||||||
model_name = args.model
|
model_name = args.model
|
||||||
|
|
||||||
# Input like Completions API
|
# Input like Completions API
|
||||||
prompt = {"model": model_name, "input": "vLLM is great!"}
|
prompt = {"model": model_name, "input": "vLLM is great!"}
|
||||||
pooling_response = post_http_request(prompt=prompt, api_url=api_url)
|
pooling_response = post_http_request(prompt=prompt, api_url=api_url)
|
||||||
|
print("-" * 50)
|
||||||
print("Pooling Response:")
|
print("Pooling Response:")
|
||||||
pprint.pprint(pooling_response.json())
|
pprint.pprint(pooling_response.json())
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
# Input like Chat API
|
# Input like Chat API
|
||||||
prompt = {
|
prompt = {
|
||||||
@ -50,3 +55,9 @@ if __name__ == "__main__":
|
|||||||
pooling_response = post_http_request(prompt=prompt, api_url=api_url)
|
pooling_response = post_http_request(prompt=prompt, api_url=api_url)
|
||||||
print("Pooling Response:")
|
print("Pooling Response:")
|
||||||
pprint.pprint(pooling_response.json())
|
pprint.pprint(pooling_response.json())
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_args()
|
||||||
|
main(args)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user