2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2024-06-07 11:23:32 -07:00
|
|
|
import openai
|
|
|
|
import pytest
|
2024-08-26 21:33:17 -07:00
|
|
|
import pytest_asyncio
|
2025-03-30 17:01:34 +08:00
|
|
|
import requests
|
|
|
|
from PIL import Image
|
|
|
|
from transformers import AutoProcessor
|
2024-06-07 11:23:32 -07:00
|
|
|
|
2024-07-23 12:32:02 +08:00
|
|
|
from vllm.multimodal.utils import encode_image_base64, fetch_image
|
2024-06-07 11:23:32 -07:00
|
|
|
|
2024-08-31 16:35:53 -07:00
|
|
|
from ...utils import RemoteOpenAIServer
|
2024-06-07 11:23:32 -07:00
|
|
|
|
2024-08-31 16:35:53 -07:00
|
|
|
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
|
|
|
|
MAXIMUM_IMAGES = 2
|
2024-06-30 12:58:49 +08:00
|
|
|
|
2024-06-07 11:23:32 -07:00
|
|
|
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
|
|
|
|
TEST_IMAGE_URLS = [
|
|
|
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
|
|
|
"https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
|
|
|
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
|
|
|
|
"https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
|
|
|
|
]
|
|
|
|
|
|
|
|
|
2024-06-14 02:21:53 +08:00
|
|
|
@pytest.fixture(scope="module")
|
2024-07-12 21:51:48 -07:00
|
|
|
def server():
|
2024-07-17 15:43:21 +08:00
|
|
|
args = [
|
2024-10-19 02:31:58 +08:00
|
|
|
"--task",
|
|
|
|
"generate",
|
2024-10-04 10:38:25 -07:00
|
|
|
"--max-model-len",
|
|
|
|
"2048",
|
|
|
|
"--max-num-seqs",
|
|
|
|
"5",
|
|
|
|
"--enforce-eager",
|
|
|
|
"--trust-remote-code",
|
|
|
|
"--limit-mm-per-prompt",
|
2025-04-18 06:13:32 +01:00
|
|
|
str({"image": MAXIMUM_IMAGES}),
|
2024-07-17 15:43:21 +08:00
|
|
|
]
|
|
|
|
|
|
|
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
2024-07-12 21:51:48 -07:00
|
|
|
yield remote_server
|
2024-06-07 11:23:32 -07:00
|
|
|
|
|
|
|
|
2024-08-26 21:33:17 -07:00
|
|
|
@pytest_asyncio.fixture
|
|
|
|
async def client(server):
|
|
|
|
async with server.get_async_client() as async_client:
|
|
|
|
yield async_client
|
2024-06-07 11:23:32 -07:00
|
|
|
|
|
|
|
|
2024-07-23 12:32:02 +08:00
|
|
|
@pytest.fixture(scope="session")
|
2025-03-03 01:34:51 +00:00
|
|
|
def base64_encoded_image() -> dict[str, str]:
|
2024-06-07 11:23:32 -07:00
|
|
|
return {
|
2024-07-23 12:32:02 +08:00
|
|
|
image_url: encode_image_base64(fetch_image(image_url))
|
2024-06-07 11:23:32 -07:00
|
|
|
for image_url in TEST_IMAGE_URLS
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-03-30 17:01:34 +08:00
|
|
|
def get_hf_prompt_tokens(model_name, content, image_url):
|
|
|
|
processor = AutoProcessor.from_pretrained(model_name,
|
|
|
|
trust_remote_code=True,
|
|
|
|
num_crops=4)
|
|
|
|
|
|
|
|
placeholder = "<|image_1|>\n"
|
|
|
|
messages = [{
|
|
|
|
"role": "user",
|
|
|
|
"content": f"{placeholder}{content}",
|
|
|
|
}]
|
|
|
|
images = [Image.open(requests.get(image_url, stream=True).raw)]
|
|
|
|
|
|
|
|
prompt = processor.tokenizer.apply_chat_template(
|
|
|
|
messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
inputs = processor(prompt, images, return_tensors="pt")
|
|
|
|
|
|
|
|
return inputs.input_ids.shape[1]
|
|
|
|
|
|
|
|
|
2024-06-07 11:23:32 -07:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
|
|
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
2024-06-14 02:21:53 +08:00
|
|
|
async def test_single_chat_session_image(client: openai.AsyncOpenAI,
|
2024-06-07 11:23:32 -07:00
|
|
|
model_name: str, image_url: str):
|
2025-03-30 17:01:34 +08:00
|
|
|
content_text = "What's in this image?"
|
2024-06-07 11:23:32 -07:00
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
|
|
|
{
|
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url": image_url
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"type": "text",
|
2025-03-30 17:01:34 +08:00
|
|
|
"text": content_text
|
2024-06-07 11:23:32 -07:00
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
|
2025-03-30 17:01:34 +08:00
|
|
|
max_completion_tokens = 10
|
2024-06-07 11:23:32 -07:00
|
|
|
# test single completion
|
2024-10-31 02:15:56 +01:00
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2025-03-30 17:01:34 +08:00
|
|
|
max_completion_tokens=max_completion_tokens,
|
2024-10-31 02:15:56 +01:00
|
|
|
logprobs=True,
|
2024-12-21 21:08:44 -08:00
|
|
|
temperature=0.0,
|
2024-10-31 02:15:56 +01:00
|
|
|
top_logprobs=5)
|
2024-06-07 11:23:32 -07:00
|
|
|
assert len(chat_completion.choices) == 1
|
|
|
|
|
|
|
|
choice = chat_completion.choices[0]
|
|
|
|
assert choice.finish_reason == "length"
|
2025-03-30 17:01:34 +08:00
|
|
|
hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text,
|
|
|
|
image_url)
|
2024-06-07 11:23:32 -07:00
|
|
|
assert chat_completion.usage == openai.types.CompletionUsage(
|
2025-03-30 17:01:34 +08:00
|
|
|
completion_tokens=max_completion_tokens,
|
|
|
|
prompt_tokens=hf_prompt_tokens,
|
|
|
|
total_tokens=hf_prompt_tokens + max_completion_tokens)
|
2024-06-07 11:23:32 -07:00
|
|
|
|
|
|
|
message = choice.message
|
|
|
|
message = chat_completion.choices[0].message
|
|
|
|
assert message.content is not None and len(message.content) >= 10
|
|
|
|
assert message.role == "assistant"
|
|
|
|
messages.append({"role": "assistant", "content": message.content})
|
|
|
|
|
|
|
|
# test multi-turn dialogue
|
|
|
|
messages.append({"role": "user", "content": "express your result in json"})
|
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-06-07 11:23:32 -07:00
|
|
|
)
|
|
|
|
message = chat_completion.choices[0].message
|
|
|
|
assert message.content is not None and len(message.content) >= 0
|
|
|
|
|
|
|
|
|
2024-10-29 19:49:47 +08:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
|
|
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
|
|
|
async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
|
|
|
|
model_name: str,
|
|
|
|
image_url: str):
|
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
|
|
|
{
|
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url": image_url
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"type": "text",
|
|
|
|
"text": "What's in this image?"
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
|
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
|
|
|
n=2,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-10-29 19:49:47 +08:00
|
|
|
logprobs=True,
|
|
|
|
top_logprobs=5,
|
|
|
|
extra_body=dict(use_beam_search=True))
|
|
|
|
assert len(chat_completion.choices) == 2
|
|
|
|
assert chat_completion.choices[
|
|
|
|
0].message.content != chat_completion.choices[1].message.content
|
|
|
|
|
|
|
|
|
2024-06-07 11:23:32 -07:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
|
|
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
|
|
|
async def test_single_chat_session_image_base64encoded(
|
2024-06-14 02:21:53 +08:00
|
|
|
client: openai.AsyncOpenAI, model_name: str, image_url: str,
|
2025-03-03 01:34:51 +00:00
|
|
|
base64_encoded_image: dict[str, str]):
|
2024-06-07 11:23:32 -07:00
|
|
|
|
2025-03-30 17:01:34 +08:00
|
|
|
content_text = "What's in this image?"
|
2024-06-07 11:23:32 -07:00
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
|
|
|
{
|
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url":
|
|
|
|
f"data:image/jpeg;base64,{base64_encoded_image[image_url]}"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"type": "text",
|
2025-03-30 17:01:34 +08:00
|
|
|
"text": content_text
|
2024-06-07 11:23:32 -07:00
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
|
2025-03-30 17:01:34 +08:00
|
|
|
max_completion_tokens = 10
|
2024-06-07 11:23:32 -07:00
|
|
|
# test single completion
|
2024-10-31 02:15:56 +01:00
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2025-03-30 17:01:34 +08:00
|
|
|
max_completion_tokens=max_completion_tokens,
|
2024-10-31 02:15:56 +01:00
|
|
|
logprobs=True,
|
2024-12-21 21:08:44 -08:00
|
|
|
temperature=0.0,
|
2024-10-31 02:15:56 +01:00
|
|
|
top_logprobs=5)
|
2024-06-07 11:23:32 -07:00
|
|
|
assert len(chat_completion.choices) == 1
|
|
|
|
|
|
|
|
choice = chat_completion.choices[0]
|
|
|
|
assert choice.finish_reason == "length"
|
2025-03-30 17:01:34 +08:00
|
|
|
hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text,
|
|
|
|
image_url)
|
2024-06-07 11:23:32 -07:00
|
|
|
assert chat_completion.usage == openai.types.CompletionUsage(
|
2025-03-30 17:01:34 +08:00
|
|
|
completion_tokens=max_completion_tokens,
|
|
|
|
prompt_tokens=hf_prompt_tokens,
|
|
|
|
total_tokens=hf_prompt_tokens + max_completion_tokens)
|
2024-06-07 11:23:32 -07:00
|
|
|
|
|
|
|
message = choice.message
|
|
|
|
message = chat_completion.choices[0].message
|
|
|
|
assert message.content is not None and len(message.content) >= 10
|
|
|
|
assert message.role == "assistant"
|
|
|
|
messages.append({"role": "assistant", "content": message.content})
|
|
|
|
|
|
|
|
# test multi-turn dialogue
|
|
|
|
messages.append({"role": "user", "content": "express your result in json"})
|
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-12-21 21:08:44 -08:00
|
|
|
temperature=0.0,
|
2024-06-07 11:23:32 -07:00
|
|
|
)
|
|
|
|
message = chat_completion.choices[0].message
|
|
|
|
assert message.content is not None and len(message.content) >= 0
|
|
|
|
|
|
|
|
|
2024-10-29 19:49:47 +08:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
|
|
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
|
|
|
async def test_single_chat_session_image_base64encoded_beamsearch(
|
|
|
|
client: openai.AsyncOpenAI, model_name: str, image_url: str,
|
2025-03-03 01:34:51 +00:00
|
|
|
base64_encoded_image: dict[str, str]):
|
2024-10-29 19:49:47 +08:00
|
|
|
|
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
|
|
|
{
|
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url":
|
|
|
|
f"data:image/jpeg;base64,{base64_encoded_image[image_url]}"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"type": "text",
|
|
|
|
"text": "What's in this image?"
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
|
|
|
n=2,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-10-29 19:49:47 +08:00
|
|
|
extra_body=dict(use_beam_search=True))
|
|
|
|
assert len(chat_completion.choices) == 2
|
|
|
|
assert chat_completion.choices[
|
|
|
|
0].message.content != chat_completion.choices[1].message.content
|
|
|
|
|
|
|
|
|
2024-06-07 11:23:32 -07:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
|
|
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
2024-06-14 02:21:53 +08:00
|
|
|
async def test_chat_streaming_image(client: openai.AsyncOpenAI,
|
2024-06-07 11:23:32 -07:00
|
|
|
model_name: str, image_url: str):
|
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
|
|
|
{
|
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url": image_url
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"type": "text",
|
|
|
|
"text": "What's in this image?"
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
|
|
|
|
# test single completion
|
|
|
|
chat_completion = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-06-07 11:23:32 -07:00
|
|
|
temperature=0.0,
|
|
|
|
)
|
|
|
|
output = chat_completion.choices[0].message.content
|
|
|
|
stop_reason = chat_completion.choices[0].finish_reason
|
|
|
|
|
|
|
|
# test streaming
|
|
|
|
stream = await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-06-07 11:23:32 -07:00
|
|
|
temperature=0.0,
|
|
|
|
stream=True,
|
|
|
|
)
|
2025-03-03 01:34:51 +00:00
|
|
|
chunks: list[str] = []
|
2024-06-07 11:23:32 -07:00
|
|
|
finish_reason_count = 0
|
|
|
|
async for chunk in stream:
|
|
|
|
delta = chunk.choices[0].delta
|
|
|
|
if delta.role:
|
|
|
|
assert delta.role == "assistant"
|
|
|
|
if delta.content:
|
|
|
|
chunks.append(delta.content)
|
|
|
|
if chunk.choices[0].finish_reason is not None:
|
|
|
|
finish_reason_count += 1
|
|
|
|
# finish reason should only return in last block
|
|
|
|
assert finish_reason_count == 1
|
|
|
|
assert chunk.choices[0].finish_reason == stop_reason
|
|
|
|
assert delta.content
|
|
|
|
assert "".join(chunks) == output
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
2024-08-31 16:35:53 -07:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"image_urls",
|
|
|
|
[TEST_IMAGE_URLS[:i] for i in range(2, len(TEST_IMAGE_URLS))])
|
2024-06-14 02:21:53 +08:00
|
|
|
async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
|
2025-03-03 01:34:51 +00:00
|
|
|
image_urls: list[str]):
|
2024-06-07 11:23:32 -07:00
|
|
|
|
|
|
|
messages = [{
|
|
|
|
"role":
|
|
|
|
"user",
|
|
|
|
"content": [
|
2024-08-31 16:35:53 -07:00
|
|
|
*({
|
2024-06-07 11:23:32 -07:00
|
|
|
"type": "image_url",
|
|
|
|
"image_url": {
|
|
|
|
"url": image_url
|
|
|
|
}
|
2024-08-31 16:35:53 -07:00
|
|
|
} for image_url in image_urls),
|
2024-06-07 11:23:32 -07:00
|
|
|
{
|
|
|
|
"type": "text",
|
|
|
|
"text": "What's in this image?"
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}]
|
|
|
|
|
2024-08-31 16:35:53 -07:00
|
|
|
if len(image_urls) > MAXIMUM_IMAGES:
|
|
|
|
with pytest.raises(openai.BadRequestError): # test multi-image input
|
|
|
|
await client.chat.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-08-31 16:35:53 -07:00
|
|
|
temperature=0.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
# the server should still work afterwards
|
|
|
|
completion = await client.completions.create(
|
|
|
|
model=model_name,
|
|
|
|
prompt=[0, 0, 0, 0, 0],
|
|
|
|
max_tokens=5,
|
|
|
|
temperature=0.0,
|
|
|
|
)
|
|
|
|
completion = completion.choices[0].text
|
|
|
|
assert completion is not None and len(completion) >= 0
|
|
|
|
else:
|
|
|
|
chat_completion = await client.chat.completions.create(
|
2024-06-07 11:23:32 -07:00
|
|
|
model=model_name,
|
|
|
|
messages=messages,
|
2024-10-31 02:15:56 +01:00
|
|
|
max_completion_tokens=10,
|
2024-06-07 11:23:32 -07:00
|
|
|
temperature=0.0,
|
|
|
|
)
|
2024-08-31 16:35:53 -07:00
|
|
|
message = chat_completion.choices[0].message
|
|
|
|
assert message.content is not None and len(message.content) >= 0
|