vllm/examples/online_serving/openai_transcription_client.py

# SPDX-License-Identifier: Apache-2.0
import asyncio
import json

import httpx
from openai import OpenAI

from vllm.assets.audio import AudioAsset

mary_had_lamb = AudioAsset('mary_had_lamb').get_local_path()
winning_call = AudioAsset('winning_call').get_local_path()

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)


def sync_openai():
    with open(str(mary_had_lamb), "rb") as f:
        transcription = client.audio.transcriptions.create(
            file=f,
            model="openai/whisper-small",
            language="en",
            response_format="json",
            temperature=0.0)
        print("transcription result:", transcription.text)


sync_openai()


# OpenAI Transcription API client does not support streaming.
async def stream_openai_response():
    data = {
        "language": "en",
        'stream': True,
        "model": "openai/whisper-large-v3",
    }
    url = openai_api_base + "/audio/transcriptions"
    print("transcription result:", end=' ')
    async with httpx.AsyncClient() as client:
        with open(str(winning_call), "rb") as f:
            async with client.stream('POST', url, files={'file': f},
                                     data=data) as response:
                async for line in response.aiter_lines():
                    # Each line is a JSON object prefixed with 'data: '
                    if line:
                        if line.startswith('data: '):
                            line = line[len('data: '):]
                        # Last chunk, stream ends
                        if line.strip() == '[DONE]':
                            break
                        # Parse the JSON response
                        chunk = json.loads(line)
                        # Extract and print the content
                        content = chunk['choices'][0].get('delta',
                                                          {}).get('content')
                        print(content, end='')


# Run the asynchronous function
asyncio.run(stream_openai_response())
[Frontend] Add `/v1/audio/transcriptions` OpenAI API endpoint (#12909) 2025-02-13 16:23:45 +01:00			`# SPDX-License-Identifier: Apache-2.0`
[Frontend][Docs] Transcription API streaming (#13301) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-06 11:39:35 +01:00			`import asyncio`
			`import json`

			`import httpx`
[Frontend] Add `/v1/audio/transcriptions` OpenAI API endpoint (#12909) 2025-02-13 16:23:45 +01:00			`from openai import OpenAI`

			`from vllm.assets.audio import AudioAsset`

			`mary_had_lamb = AudioAsset('mary_had_lamb').get_local_path()`
			`winning_call = AudioAsset('winning_call').get_local_path()`

			`# Modify OpenAI's API key and API base to use vLLM's API server.`
			`openai_api_key = "EMPTY"`
			`openai_api_base = "http://localhost:8000/v1"`
			`client = OpenAI(`
			`api_key=openai_api_key,`
			`base_url=openai_api_base,`
			`)`
[Frontend][Docs] Transcription API streaming (#13301) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-06 11:39:35 +01:00

			`def sync_openai():`
			`with open(str(mary_had_lamb), "rb") as f:`
			`transcription = client.audio.transcriptions.create(`
			`file=f,`
			`model="openai/whisper-small",`
			`language="en",`
			`response_format="json",`
			`temperature=0.0)`
			`print("transcription result:", transcription.text)`


			`sync_openai()`


			`# OpenAI Transcription API client does not support streaming.`
			`async def stream_openai_response():`
			`data = {`
			`"language": "en",`
			`'stream': True,`
			`"model": "openai/whisper-large-v3",`
			`}`
			`url = openai_api_base + "/audio/transcriptions"`
			`print("transcription result:", end=' ')`
			`async with httpx.AsyncClient() as client:`
			`with open(str(winning_call), "rb") as f:`
			`async with client.stream('POST', url, files={'file': f},`
			`data=data) as response:`
			`async for line in response.aiter_lines():`
			`# Each line is a JSON object prefixed with 'data: '`
			`if line:`
			`if line.startswith('data: '):`
			`line = line[len('data: '):]`
			`# Last chunk, stream ends`
			`if line.strip() == '[DONE]':`
			`break`
			`# Parse the JSON response`
			`chunk = json.loads(line)`
			`# Extract and print the content`
			`content = chunk['choices'][0].get('delta',`
			`{}).get('content')`
			`print(content, end='')`


			`# Run the asynchronous function`
			`asyncio.run(stream_openai_response())`