[Misc][OpenAI] deprecate max_tokens in favor of new max_completion_tokens field for chat completion endpoint (#9837)
This commit is contained in:
parent
64384bbcdf
commit
abbfb6134d
@ -324,7 +324,7 @@ async def async_request_openai_chat_completions(
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"max_tokens": request_func_input.output_len,
|
"max_completion_tokens": request_func_input.output_len,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"ignore_eos": request_func_input.ignore_eos,
|
"ignore_eos": request_func_input.ignore_eos,
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,7 @@ SkyPilot can scale up the service to multiple service replicas with built-in aut
|
|||||||
messages:
|
messages:
|
||||||
- role: user
|
- role: user
|
||||||
content: Hello! What is your name?
|
content: Hello! What is your name?
|
||||||
max_tokens: 1
|
max_completion_tokens: 1
|
||||||
|
|
||||||
.. raw:: html
|
.. raw:: html
|
||||||
|
|
||||||
@ -129,7 +129,7 @@ SkyPilot can scale up the service to multiple service replicas with built-in aut
|
|||||||
messages:
|
messages:
|
||||||
- role: user
|
- role: user
|
||||||
content: Hello! What is your name?
|
content: Hello! What is your name?
|
||||||
max_tokens: 1
|
max_completion_tokens: 1
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
||||||
@ -255,7 +255,7 @@ This will scale the service up to when the QPS exceeds 2 for each replica.
|
|||||||
messages:
|
messages:
|
||||||
- role: user
|
- role: user
|
||||||
content: Hello! What is your name?
|
content: Hello! What is your name?
|
||||||
max_tokens: 1
|
max_completion_tokens: 1
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
||||||
|
@ -35,8 +35,8 @@
|
|||||||
|
|
||||||
```
|
```
|
||||||
$ cat openai_example_batch.jsonl
|
$ cat openai_example_batch.jsonl
|
||||||
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Run the batch
|
### Step 2: Run the batch
|
||||||
@ -94,8 +94,8 @@ To follow along with this example, you can download the example batch, or create
|
|||||||
|
|
||||||
```
|
```
|
||||||
$ cat openai_example_batch.jsonl
|
$ cat openai_example_batch.jsonl
|
||||||
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
```
|
```
|
||||||
|
|
||||||
Now upload your batch file to your S3 bucket.
|
Now upload your batch file to your S3 bucket.
|
||||||
|
@ -53,7 +53,7 @@ def run_text_only() -> None:
|
|||||||
"content": "What's the capital of France?"
|
"content": "What's the capital of France?"
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion.choices[0].message.content
|
result = chat_completion.choices[0].message.content
|
||||||
@ -83,7 +83,7 @@ def run_single_image() -> None:
|
|||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
@ -109,7 +109,7 @@ def run_single_image() -> None:
|
|||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
@ -144,7 +144,7 @@ def run_multi_image() -> None:
|
|||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
@ -175,7 +175,7 @@ def run_audio() -> None:
|
|||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
@ -201,7 +201,7 @@ def run_audio() -> None:
|
|||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=64,
|
max_completion_tokens=64,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
|
||||||
|
@ -10,7 +10,7 @@ protobuf # Required by LlamaTokenizer.
|
|||||||
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
|
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
|
||||||
fastapi >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9'
|
fastapi >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9'
|
||||||
aiohttp
|
aiohttp
|
||||||
openai >= 1.40.0 # Ensure modern openai package (ensure types module present)
|
openai >= 1.45.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic >= 2.9 # Required for fastapi >= 0.113.0
|
pydantic >= 2.9 # Required for fastapi >= 0.113.0
|
||||||
pillow # Required for image processing
|
pillow # Required for image processing
|
||||||
|
@ -68,11 +68,12 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
# test single completion
|
# test single completion
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
logprobs=True,
|
max_completion_tokens=10,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@ -91,7 +92,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
@ -123,11 +124,12 @@ async def test_single_chat_session_audio_base64encoded(
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
# test single completion
|
# test single completion
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
logprobs=True,
|
max_completion_tokens=10,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@ -146,7 +148,7 @@ async def test_single_chat_session_audio_base64encoded(
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
@ -178,7 +180,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
output = chat_completion.choices[0].message.content
|
output = chat_completion.choices[0].message.content
|
||||||
@ -188,7 +190,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
@ -242,7 +244,7 @@ async def test_multi_audio_input(client: openai.AsyncOpenAI, model_name: str,
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -65,11 +65,12 @@ async def test_no_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
|||||||
"content": "what is 1+1?"
|
"content": "what is 1+1?"
|
||||||
}]
|
}]
|
||||||
|
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=5,
|
messages=messages,
|
||||||
temperature=0.0,
|
max_completion_tokens=5,
|
||||||
logprobs=False)
|
temperature=0.0,
|
||||||
|
logprobs=False)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
assert choice.logprobs is None
|
assert choice.logprobs is None
|
||||||
@ -90,12 +91,13 @@ async def test_zero_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
|||||||
"content": "what is 1+1?"
|
"content": "what is 1+1?"
|
||||||
}]
|
}]
|
||||||
|
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=5,
|
messages=messages,
|
||||||
temperature=0.0,
|
max_completion_tokens=5,
|
||||||
logprobs=True,
|
temperature=0.0,
|
||||||
top_logprobs=0)
|
logprobs=True,
|
||||||
|
top_logprobs=0)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
assert choice.logprobs is not None
|
assert choice.logprobs is not None
|
||||||
@ -117,12 +119,13 @@ async def test_some_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
|||||||
"content": "what is 1+1?"
|
"content": "what is 1+1?"
|
||||||
}]
|
}]
|
||||||
|
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=5,
|
messages=messages,
|
||||||
temperature=0.0,
|
max_completion_tokens=5,
|
||||||
logprobs=True,
|
temperature=0.0,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
assert choice.logprobs is not None
|
assert choice.logprobs is not None
|
||||||
@ -149,7 +152,7 @@ async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
|
|||||||
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
||||||
stream = await client.chat.completions.create(model=model_name,
|
stream = await client.chat.completions.create(model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
logprobs=True,
|
logprobs=True,
|
||||||
top_logprobs=21,
|
top_logprobs=21,
|
||||||
stream=True)
|
stream=True)
|
||||||
@ -159,16 +162,17 @@ async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
|
|||||||
with pytest.raises(openai.BadRequestError):
|
with pytest.raises(openai.BadRequestError):
|
||||||
await client.chat.completions.create(model=model_name,
|
await client.chat.completions.create(model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
logprobs=True,
|
logprobs=True,
|
||||||
top_logprobs=30,
|
top_logprobs=30,
|
||||||
stream=False)
|
stream=False)
|
||||||
|
|
||||||
# the server should still work afterwards
|
# the server should still work afterwards
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
stream=False)
|
max_completion_tokens=10,
|
||||||
|
stream=False)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
|
|
||||||
@ -271,11 +275,12 @@ async def test_single_chat_session(client: openai.AsyncOpenAI,
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
# test single completion
|
# test single completion
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
logprobs=True,
|
max_completion_tokens=10,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
assert chat_completion.id is not None
|
assert chat_completion.id is not None
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
|
|
||||||
@ -294,7 +299,7 @@ async def test_single_chat_session(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
@ -319,7 +324,7 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
output = chat_completion.choices[0].message.content
|
output = chat_completion.choices[0].message.content
|
||||||
@ -329,7 +334,7 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
@ -369,7 +374,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
stream_options={"include_usage": False})
|
stream_options={"include_usage": False})
|
||||||
@ -380,7 +385,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
|||||||
# "continuous_usage_stats": False}}
|
# "continuous_usage_stats": False}}
|
||||||
stream = await client.chat.completions.create(model=model_name,
|
stream = await client.chat.completions.create(model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
stream_options={
|
stream_options={
|
||||||
@ -409,7 +414,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=False,
|
stream=False,
|
||||||
stream_options={"include_usage": None})
|
stream_options={"include_usage": None})
|
||||||
@ -419,7 +424,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=False,
|
stream=False,
|
||||||
stream_options={"include_usage": True})
|
stream_options={"include_usage": True})
|
||||||
@ -429,7 +434,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
extra_body=dict(min_tokens=10),
|
extra_body=dict(min_tokens=10),
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
@ -476,7 +481,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
extra_body=dict(guided_choice=sample_guided_choice,
|
extra_body=dict(guided_choice=sample_guided_choice,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
choice1 = chat_completion.choices[0].message.content
|
choice1 = chat_completion.choices[0].message.content
|
||||||
@ -490,7 +495,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
extra_body=dict(guided_choice=sample_guided_choice,
|
extra_body=dict(guided_choice=sample_guided_choice,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
choice2 = chat_completion.choices[0].message.content
|
choice2 = chat_completion.choices[0].message.content
|
||||||
@ -517,7 +522,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
extra_body=dict(guided_json=sample_json_schema,
|
extra_body=dict(guided_json=sample_json_schema,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
@ -535,7 +540,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
extra_body=dict(guided_json=sample_json_schema,
|
extra_body=dict(guided_json=sample_json_schema,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
@ -563,7 +568,7 @@ async def test_guided_regex_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=20,
|
max_completion_tokens=20,
|
||||||
extra_body=dict(guided_regex=sample_regex,
|
extra_body=dict(guided_regex=sample_regex,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
ip1 = chat_completion.choices[0].message.content
|
ip1 = chat_completion.choices[0].message.content
|
||||||
@ -575,7 +580,7 @@ async def test_guided_regex_chat(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=20,
|
max_completion_tokens=20,
|
||||||
extra_body=dict(guided_regex=sample_regex,
|
extra_body=dict(guided_regex=sample_regex,
|
||||||
guided_decoding_backend=guided_decoding_backend))
|
guided_decoding_backend=guided_decoding_backend))
|
||||||
ip2 = chat_completion.choices[0].message.content
|
ip2 = chat_completion.choices[0].message.content
|
||||||
@ -623,7 +628,7 @@ async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
logprobs=True,
|
logprobs=True,
|
||||||
top_logprobs=5,
|
top_logprobs=5,
|
||||||
extra_body=dict(guided_choice=sample_guided_choice,
|
extra_body=dict(guided_choice=sample_guided_choice,
|
||||||
@ -660,7 +665,7 @@ async def test_named_tool_use(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tools=[{
|
tools=[{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@ -694,7 +699,7 @@ async def test_named_tool_use(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tools=[{
|
tools=[{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@ -750,7 +755,7 @@ async def test_required_tool_use_not_yet_supported(
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tools=[{
|
tools=[{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@ -765,7 +770,7 @@ async def test_required_tool_use_not_yet_supported(
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tools=[{
|
tools=[{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@ -796,7 +801,7 @@ async def test_inconsistent_tool_choice_and_tools(client: openai.AsyncOpenAI,
|
|||||||
with pytest.raises(openai.BadRequestError):
|
with pytest.raises(openai.BadRequestError):
|
||||||
await client.chat.completions.create(model=MODEL_NAME,
|
await client.chat.completions.create(model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tool_choice={
|
tool_choice={
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@ -809,7 +814,7 @@ async def test_inconsistent_tool_choice_and_tools(client: openai.AsyncOpenAI,
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=1000,
|
max_completion_tokens=1000,
|
||||||
tools=[{
|
tools=[{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
|
@ -78,11 +78,12 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
# test single completion
|
# test single completion
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
logprobs=True,
|
max_completion_tokens=10,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@ -101,7 +102,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
@ -134,7 +135,7 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
n=2,
|
n=2,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
logprobs=True,
|
logprobs=True,
|
||||||
top_logprobs=5,
|
top_logprobs=5,
|
||||||
extra_body=dict(use_beam_search=True))
|
extra_body=dict(use_beam_search=True))
|
||||||
@ -169,11 +170,12 @@ async def test_single_chat_session_image_base64encoded(
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
# test single completion
|
# test single completion
|
||||||
chat_completion = await client.chat.completions.create(model=model_name,
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=messages,
|
model=model_name,
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
logprobs=True,
|
max_completion_tokens=10,
|
||||||
top_logprobs=5)
|
logprobs=True,
|
||||||
|
top_logprobs=5)
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@ -192,7 +194,7 @@ async def test_single_chat_session_image_base64encoded(
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
assert message.content is not None and len(message.content) >= 0
|
assert message.content is not None and len(message.content) >= 0
|
||||||
@ -226,7 +228,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
n=2,
|
n=2,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
extra_body=dict(use_beam_search=True))
|
extra_body=dict(use_beam_search=True))
|
||||||
assert len(chat_completion.choices) == 2
|
assert len(chat_completion.choices) == 2
|
||||||
assert chat_completion.choices[
|
assert chat_completion.choices[
|
||||||
@ -259,7 +261,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
output = chat_completion.choices[0].message.content
|
output = chat_completion.choices[0].message.content
|
||||||
@ -269,7 +271,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
@ -320,7 +322,7 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
|
|||||||
await client.chat.completions.create(
|
await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -337,7 +339,7 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_completion_tokens=10,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
|
@ -18,7 +18,7 @@ async def test_chat_completion_without_tools(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@ -38,7 +38,7 @@ async def test_chat_completion_without_tools(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
stream=True,
|
stream=True,
|
||||||
@ -86,7 +86,7 @@ async def test_chat_completion_with_tools(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL],
|
tools=[WEATHER_TOOL],
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
@ -107,7 +107,7 @@ async def test_chat_completion_with_tools(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
tools=[WEATHER_TOOL],
|
tools=[WEATHER_TOOL],
|
||||||
|
@ -26,7 +26,7 @@ async def test_parallel_tool_calls(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=200,
|
max_completion_tokens=200,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
@ -63,7 +63,7 @@ async def test_parallel_tool_calls(client: openai.AsyncOpenAI,
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=200,
|
max_completion_tokens=200,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
stream=True)
|
stream=True)
|
||||||
@ -154,7 +154,7 @@ async def test_parallel_tool_calls_with_results(client: openai.AsyncOpenAI,
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=200,
|
max_completion_tokens=200,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
@ -172,7 +172,7 @@ async def test_parallel_tool_calls_with_results(client: openai.AsyncOpenAI,
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=200,
|
max_completion_tokens=200,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
@ -17,7 +17,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=MESSAGES_ASKING_FOR_TOOLS,
|
messages=MESSAGES_ASKING_FOR_TOOLS,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=100,
|
max_completion_tokens=100,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
@ -61,7 +61,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
messages=MESSAGES_ASKING_FOR_TOOLS,
|
messages=MESSAGES_ASKING_FOR_TOOLS,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=100,
|
max_completion_tokens=100,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
stream=True)
|
stream=True)
|
||||||
@ -142,7 +142,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
|||||||
chat_completion = await client.chat.completions.create(
|
chat_completion = await client.chat.completions.create(
|
||||||
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=100,
|
max_completion_tokens=100,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False)
|
logprobs=False)
|
||||||
@ -159,7 +159,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
|||||||
stream = await client.chat.completions.create(
|
stream = await client.chat.completions.create(
|
||||||
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=100,
|
max_completion_tokens=100,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
@ -159,7 +159,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
logit_bias: Optional[Dict[str, float]] = None
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
logprobs: Optional[bool] = False
|
logprobs: Optional[bool] = False
|
||||||
top_logprobs: Optional[int] = 0
|
top_logprobs: Optional[int] = 0
|
||||||
max_tokens: Optional[int] = None
|
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||||
|
max_tokens: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
deprecated=
|
||||||
|
'max_tokens is deprecated in favor of the max_completion_tokens field')
|
||||||
|
max_completion_tokens: Optional[int] = None
|
||||||
n: Optional[int] = 1
|
n: Optional[int] = 1
|
||||||
presence_penalty: Optional[float] = 0.0
|
presence_penalty: Optional[float] = 0.0
|
||||||
response_format: Optional[ResponseFormat] = None
|
response_format: Optional[ResponseFormat] = None
|
||||||
@ -295,7 +300,8 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
|
|
||||||
def to_beam_search_params(self,
|
def to_beam_search_params(self,
|
||||||
default_max_tokens: int) -> BeamSearchParams:
|
default_max_tokens: int) -> BeamSearchParams:
|
||||||
max_tokens = self.max_tokens
|
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||||
|
max_tokens = self.max_completion_tokens or self.max_tokens
|
||||||
if max_tokens is None:
|
if max_tokens is None:
|
||||||
max_tokens = default_max_tokens
|
max_tokens = default_max_tokens
|
||||||
|
|
||||||
@ -311,7 +317,8 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
include_stop_str_in_output=self.include_stop_str_in_output)
|
include_stop_str_in_output=self.include_stop_str_in_output)
|
||||||
|
|
||||||
def to_sampling_params(self, default_max_tokens: int) -> SamplingParams:
|
def to_sampling_params(self, default_max_tokens: int) -> SamplingParams:
|
||||||
max_tokens = self.max_tokens
|
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||||
|
max_tokens = self.max_completion_tokens or self.max_tokens
|
||||||
if max_tokens is None:
|
if max_tokens is None:
|
||||||
max_tokens = default_max_tokens
|
max_tokens = default_max_tokens
|
||||||
|
|
||||||
|
@ -263,20 +263,26 @@ class OpenAIServing:
|
|||||||
return TextTokensPrompt(prompt=input_text,
|
return TextTokensPrompt(prompt=input_text,
|
||||||
prompt_token_ids=input_ids)
|
prompt_token_ids=input_ids)
|
||||||
|
|
||||||
if request.max_tokens is None:
|
# chat completion endpoint supports max_completion_tokens
|
||||||
|
if isinstance(request, ChatCompletionRequest):
|
||||||
|
# TODO(#9845): remove max_tokens when field dropped from OpenAI API
|
||||||
|
max_tokens = request.max_completion_tokens or request.max_tokens
|
||||||
|
else:
|
||||||
|
max_tokens = request.max_tokens
|
||||||
|
if max_tokens is None:
|
||||||
if token_num >= self.max_model_len:
|
if token_num >= self.max_model_len:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"This model's maximum context length is "
|
f"This model's maximum context length is "
|
||||||
f"{self.max_model_len} tokens. However, you requested "
|
f"{self.max_model_len} tokens. However, you requested "
|
||||||
f"{token_num} tokens in the messages, "
|
f"{token_num} tokens in the messages, "
|
||||||
f"Please reduce the length of the messages.")
|
f"Please reduce the length of the messages.")
|
||||||
elif token_num + request.max_tokens > self.max_model_len:
|
elif token_num + max_tokens > self.max_model_len:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"This model's maximum context length is "
|
f"This model's maximum context length is "
|
||||||
f"{self.max_model_len} tokens. However, you requested "
|
f"{self.max_model_len} tokens. However, you requested "
|
||||||
f"{request.max_tokens + token_num} tokens "
|
f"{max_tokens + token_num} tokens "
|
||||||
f"({token_num} in the messages, "
|
f"({token_num} in the messages, "
|
||||||
f"{request.max_tokens} in the completion). "
|
f"{max_tokens} in the completion). "
|
||||||
f"Please reduce the length of the messages or completion.")
|
f"Please reduce the length of the messages or completion.")
|
||||||
|
|
||||||
return TextTokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
|
return TextTokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user