38 lines
1.0 KiB
Python
38 lines
1.0 KiB
Python
![]() |
import asyncio
|
||
|
from dataclasses import dataclass
|
||
|
|
||
|
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||
|
|
||
|
MODEL_NAME = "openai-community/gpt2"
|
||
|
CHAT_TEMPLATE = "Dummy chat template for testing {}"
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class MockModelConfig:
|
||
|
tokenizer = MODEL_NAME
|
||
|
trust_remote_code = False
|
||
|
tokenizer_mode = "auto"
|
||
|
max_model_len = 100
|
||
|
tokenizer_revision = None
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class MockEngine:
|
||
|
|
||
|
async def get_model_config(self):
|
||
|
return MockModelConfig
|
||
|
|
||
|
|
||
|
async def _async_serving_chat_init():
|
||
|
serving_completion = OpenAIServingChat(MockEngine(),
|
||
|
served_model_names=[MODEL_NAME],
|
||
|
response_role="assistant",
|
||
|
chat_template=CHAT_TEMPLATE)
|
||
|
return serving_completion
|
||
|
|
||
|
|
||
|
def test_async_serving_chat_init():
|
||
|
serving_completion = asyncio.run(_async_serving_chat_init())
|
||
|
assert serving_completion.tokenizer is not None
|
||
|
assert serving_completion.tokenizer.chat_template == CHAT_TEMPLATE
|