2025-02-13 16:23:45 +01:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-03-06 11:39:35 +01:00
|
|
|
import asyncio
|
|
|
|
import json
|
|
|
|
|
|
|
|
import httpx
|
2025-02-13 16:23:45 +01:00
|
|
|
from openai import OpenAI
|
|
|
|
|
|
|
|
from vllm.assets.audio import AudioAsset
|
|
|
|
|
|
|
|
mary_had_lamb = AudioAsset('mary_had_lamb').get_local_path()
|
|
|
|
winning_call = AudioAsset('winning_call').get_local_path()
|
|
|
|
|
|
|
|
# Modify OpenAI's API key and API base to use vLLM's API server.
|
|
|
|
openai_api_key = "EMPTY"
|
|
|
|
openai_api_base = "http://localhost:8000/v1"
|
|
|
|
client = OpenAI(
|
|
|
|
api_key=openai_api_key,
|
|
|
|
base_url=openai_api_base,
|
|
|
|
)
|
2025-03-06 11:39:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
def sync_openai():
|
|
|
|
with open(str(mary_had_lamb), "rb") as f:
|
|
|
|
transcription = client.audio.transcriptions.create(
|
|
|
|
file=f,
|
|
|
|
model="openai/whisper-small",
|
|
|
|
language="en",
|
|
|
|
response_format="json",
|
|
|
|
temperature=0.0)
|
|
|
|
print("transcription result:", transcription.text)
|
|
|
|
|
|
|
|
|
|
|
|
sync_openai()
|
|
|
|
|
|
|
|
|
|
|
|
# OpenAI Transcription API client does not support streaming.
|
|
|
|
async def stream_openai_response():
|
|
|
|
data = {
|
|
|
|
"language": "en",
|
|
|
|
'stream': True,
|
|
|
|
"model": "openai/whisper-large-v3",
|
|
|
|
}
|
|
|
|
url = openai_api_base + "/audio/transcriptions"
|
|
|
|
print("transcription result:", end=' ')
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
|
|
with open(str(winning_call), "rb") as f:
|
|
|
|
async with client.stream('POST', url, files={'file': f},
|
|
|
|
data=data) as response:
|
|
|
|
async for line in response.aiter_lines():
|
|
|
|
# Each line is a JSON object prefixed with 'data: '
|
|
|
|
if line:
|
|
|
|
if line.startswith('data: '):
|
|
|
|
line = line[len('data: '):]
|
|
|
|
# Last chunk, stream ends
|
|
|
|
if line.strip() == '[DONE]':
|
|
|
|
break
|
|
|
|
# Parse the JSON response
|
|
|
|
chunk = json.loads(line)
|
|
|
|
# Extract and print the content
|
|
|
|
content = chunk['choices'][0].get('delta',
|
|
|
|
{}).get('content')
|
|
|
|
print(content, end='')
|
|
|
|
|
|
|
|
|
|
|
|
# Run the asynchronous function
|
|
|
|
asyncio.run(stream_openai_response())
|