Add property-based testing for vLLM endpoints using an API defined by an OpenAPI 3.1 schema (#16721)

Signed-off-by: Tarun Kumar <takumar@redhat.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2025-04-18 09:38:27 +05:30 · 2025-04-18 09:38:27 +05:30 · e37073efd7
commit e37073efd7
parent 183dad7a85
4 changed files with 134 additions and 6 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -118,7 +118,7 @@ steps:
  - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
  - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
  - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
  - pytest -v -s entrypoints/test_chat_utils.py
  - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests

--- a/requirements/test.in
+++ b/requirements/test.in
@ -37,6 +37,7 @@ lm-eval[api]==0.4.8 # required for model evaluation test
 transformers==4.51.1
 tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.30.0  # Required for Xet downloads.
+schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes>=0.45.3
 buildkite-test-collector==0.1.9
--- a/requirements/test.txt
+++ b/requirements/test.txt
@ -20,21 +20,29 @@ aiosignal==1.3.1
 annotated-types==0.7.0
    # via pydantic
 anyio==4.6.2.post1
-    # via httpx
+    # via
+    #   httpx
+    #   starlette
 argcomplete==3.5.1
    # via datamodel-code-generator
+arrow==1.3.0
+    # via isoduration
 attrs==24.2.0
    # via
    #   aiohttp
+    #   hypothesis
    #   jsonlines
    #   jsonschema
+    #   pytest-subtests
    #   referencing
 audioread==3.0.1
    # via librosa
 awscli==1.35.23
    # via -r requirements/test.in
 backoff==2.2.1
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   schemathesis
 bitsandbytes==0.45.3
    # via -r requirements/test.in
 black==24.10.0
@ -69,11 +77,13 @@ click==8.1.7
    #   jiwer
    #   nltk
    #   ray
+    #   schemathesis
    #   typer
 colorama==0.4.6
    # via
    #   awscli
    #   sacrebleu
+    #   schemathesis
    #   tqdm-multiprocess
 contourpy==1.3.0
    # via matplotlib
@ -138,6 +148,8 @@ filelock==3.16.1
    #   transformers
 fonttools==4.54.1
    # via matplotlib
+fqdn==1.5.1
+    # via jsonschema
 frozendict==2.4.6
    # via einx
 frozenlist==1.5.0
@ -156,8 +168,12 @@ genai-perf==0.0.8
    # via -r requirements/test.in
 genson==1.3.0
    # via datamodel-code-generator
+graphql-core==3.2.6
+    # via hypothesis-graphql
 h11==0.14.0
    # via httpcore
+harfile==0.3.0
+    # via schemathesis
 hf-xet==0.1.4
    # via huggingface-hub
 hiredis==3.0.0
@ -165,7 +181,9 @@ hiredis==3.0.0
 httpcore==1.0.6
    # via httpx
 httpx==0.27.2
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   schemathesis
 huggingface-hub==0.30.1
    # via
    #   -r requirements/test.in
@ -180,17 +198,29 @@ huggingface-hub==0.30.1
    #   vocos
 humanize==4.11.0
    # via runai-model-streamer
+hypothesis==6.131.0
+    # via
+    #   hypothesis-graphql
+    #   hypothesis-jsonschema
+    #   schemathesis
+hypothesis-graphql==0.11.1
+    # via schemathesis
+hypothesis-jsonschema==0.23.1
+    # via schemathesis
 idna==3.10
    # via
    #   anyio
    #   email-validator
    #   httpx
+    #   jsonschema
    #   requests
    #   yarl
 inflect==5.6.2
    # via datamodel-code-generator
 iniconfig==2.0.0
    # via pytest
+isoduration==20.11.0
+    # via jsonschema
 isort==5.13.2
    # via datamodel-code-generator
 jinja2==3.1.6
@ -210,12 +240,18 @@ joblib==1.4.2
    #   scikit-learn
 jsonlines==4.0.0
    # via lm-eval
+jsonpointer==3.0.0
+    # via jsonschema
 jsonschema==4.23.0
    # via
+    #   hypothesis-jsonschema
    #   mistral-common
    #   ray
+    #   schemathesis
 jsonschema-specifications==2024.10.1
    # via jsonschema
+junit-xml==1.9
+    # via schemathesis
 kaleido==0.2.1
    # via genai-perf
 kiwisolver==1.4.7
@ -239,7 +275,9 @@ mamba-ssm==2.2.4
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
-    # via jinja2
+    # via
+    #   jinja2
+    #   werkzeug
 matplotlib==3.9.2
    # via -r requirements/test.in
 mbstrdecoder==1.1.3
@ -449,6 +487,8 @@ pygments==2.18.0
    # via rich
 pyparsing==3.2.0
    # via matplotlib
+pyrate-limiter==3.7.0
+    # via schemathesis
 pytablewriter==1.2.0
    # via lm-eval
 pytest==8.3.3
@ -461,7 +501,9 @@ pytest==8.3.3
    #   pytest-mock
    #   pytest-rerunfailures
    #   pytest-shard
+    #   pytest-subtests
    #   pytest-timeout
+    #   schemathesis
 pytest-asyncio==0.24.0
    # via -r requirements/test.in
 pytest-forked==1.6.0
@ -472,10 +514,13 @@ pytest-rerunfailures==14.0
    # via -r requirements/test.in
 pytest-shard==0.1.2
    # via -r requirements/test.in
+pytest-subtests==0.14.1
+    # via schemathesis
 pytest-timeout==2.3.1
    # via -r requirements/test.in
 python-dateutil==2.9.0.post0
    # via
+    #   arrow
    #   botocore
    #   matplotlib
    #   pandas
@ -497,6 +542,7 @@ pyyaml==6.0.2
    #   peft
    #   ray
    #   responses
+    #   schemathesis
    #   timm
    #   transformers
    #   vocos
@ -527,10 +573,16 @@ requests==2.32.3
    #   pooch
    #   ray
    #   responses
+    #   schemathesis
+    #   starlette-testclient
    #   tiktoken
    #   transformers
 responses==0.25.3
    # via genai-perf
+rfc3339-validator==0.1.4
+    # via jsonschema
+rfc3987==1.3.8
+    # via jsonschema
 rich==13.9.4
    # via
    #   genai-perf
@ -559,6 +611,8 @@ safetensors==0.4.5
    #   peft
    #   timm
    #   transformers
+schemathesis==3.39.15
+    # via -r requirements/test.in
 scikit-learn==1.5.2
    # via
    #   librosa
@ -584,12 +638,16 @@ shellingham==1.5.4
    # via typer
 six==1.16.0
    # via
+    #   junit-xml
    #   python-dateutil
+    #   rfc3339-validator
    #   rouge-score
 sniffio==1.3.1
    # via
    #   anyio
    #   httpx
+sortedcontainers==2.4.0
+    # via hypothesis
 soundfile==0.12.1
    # via
    #   -r requirements/test.in
@ -598,6 +656,12 @@ soxr==0.5.0.post1
    # via librosa
 sqlitedict==2.1.0
    # via lm-eval
+starlette==0.46.2
+    # via
+    #   schemathesis
+    #   starlette-testclient
+starlette-testclient==0.4.1
+    # via schemathesis
 statsmodels==0.14.4
    # via genai-perf
 sympy==1.13.1
@ -628,6 +692,10 @@ tokenizers==0.21.1
    # via
    #   -r requirements/test.in
    #   transformers
+tomli==2.2.1
+    # via schemathesis
+tomli-w==1.2.0
+    # via schemathesis
 torch==2.6.0
    # via
    #   -r requirements/test.in
@ -693,6 +761,8 @@ typepy==1.3.2
    #   tabledata
 typer==0.15.2
    # via fastsafetensors
+types-python-dateutil==2.9.0.20241206
+    # via arrow
 typing-extensions==4.12.2
    # via
    #   huggingface-hub
@ -705,6 +775,8 @@ typing-extensions==4.12.2
    #   typer
 tzdata==2024.2
    # via pandas
+uri-template==1.3.0
+    # via jsonschema
 urllib3==2.2.3
    # via
    #   blobfile
@ -716,6 +788,10 @@ vector-quantize-pytorch==1.21.2
    # via -r requirements/test.in
 vocos==0.1.0
    # via -r requirements/test.in
+webcolors==24.11.1
+    # via jsonschema
+werkzeug==3.1.3
+    # via schemathesis
 word2number==1.1
    # via lm-eval
 xxhash==3.5.0
@ -723,6 +799,8 @@ xxhash==3.5.0
    #   datasets
    #   evaluate
 yarl==1.17.1
-    # via aiohttp
+    # via
+    #   aiohttp
+    #   schemathesis
 zstandard==0.23.0
    # via lm-eval
--- a/tests/entrypoints/openai/test_openai_schema.py
+++ b/tests/entrypoints/openai/test_openai_schema.py
@ -0,0 +1,49 @@
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+import schemathesis
+from schemathesis import GenerationConfig
+
+from ...utils import RemoteOpenAIServer
+
+schemathesis.experimental.OPEN_API_3_1.enable()
+
+MODEL_NAME = "HuggingFaceTB/SmolVLM-256M-Instruct"
+MAXIMUM_IMAGES = 2
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--task",
+        "generate",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "5",
+        "--enforce-eager",
+        "--trust-remote-code",
+        "--limit-mm-per-prompt",
+        f"image={MAXIMUM_IMAGES}",
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.fixture(scope="module")
+def get_schema(server):
+    # avoid generating null (\x00) bytes in strings during test case generation
+    return schemathesis.openapi.from_uri(
+        f"{server.url_root}/openapi.json",
+        generation_config=GenerationConfig(allow_x00=False),
+    )
+
+
+schema = schemathesis.from_pytest_fixture("get_schema")
+
+
+@schema.parametrize()
+@schema.override(headers={"Content-Type": "application/json"})
+async def test_openapi_stateless(case):
+    #No need to verify SSL certificate for localhost
+    await case.call_and_validate(verify=False)