From 6cd40a5bfed24ef0ceca83b0450be6920d8ca6d4 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 8 Jan 2025 21:34:44 +0800 Subject: [PATCH] [Doc][4/N] Reorganize API Reference (#11843) Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 2 +- Dockerfile | 4 ++-- .../{dev => api}/engine/async_llm_engine.md | 0 .../engine_index.md => api/engine/index.md} | 0 docs/source/{dev => api}/engine/llm_engine.md | 0 .../multimodal/index.md} | 10 -------- .../offline_inference/index.md} | 0 .../{dev => api}/offline_inference/llm.md | 0 .../offline_inference/llm_inputs.md | 0 docs/source/api/params.md | 22 ++++++++++++++++++ .../dockerfile-stages-dependency.png | Bin .../contributing/dockerfile/dockerfile.md | 2 +- docs/source/design/arch_overview.md | 2 +- .../multimodal/adding_multimodal_plugin.md | 16 ------------- docs/source/dev/pooling_params.md | 6 ----- docs/source/dev/sampling_params.md | 6 ----- docs/source/getting_started/quickstart.md | 2 +- docs/source/index.md | 9 ++++--- docs/source/serving/offline_inference.md | 2 +- .../serving/openai_compatible_server.md | 8 +++---- vllm/multimodal/base.py | 3 --- vllm/multimodal/inputs.py | 6 ----- vllm/multimodal/registry.py | 3 --- vllm/pooling_params.py | 2 +- 24 files changed, 38 insertions(+), 67 deletions(-) rename docs/source/{dev => api}/engine/async_llm_engine.md (100%) rename docs/source/{dev/engine/engine_index.md => api/engine/index.md} (100%) rename docs/source/{dev => api}/engine/llm_engine.md (100%) rename docs/source/{design/multimodal/multimodal_index.md => api/multimodal/index.md} (84%) rename docs/source/{dev/offline_inference/offline_index.md => api/offline_inference/index.md} (100%) rename docs/source/{dev => api}/offline_inference/llm.md (100%) rename docs/source/{dev => api}/offline_inference/llm_inputs.md (100%) create mode 100644 docs/source/api/params.md rename docs/source/assets/{dev => contributing}/dockerfile-stages-dependency.png (100%) delete mode 100644 docs/source/design/multimodal/adding_multimodal_plugin.md delete mode 100644 docs/source/dev/pooling_params.md delete mode 100644 docs/source/dev/sampling_params.md diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index b7178b94..f883595f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -38,7 +38,7 @@ steps: - pip install -r requirements-docs.txt - SPHINXOPTS=\"-W\" make html # Check API reference (if it fails, you may have missing mock imports) - - grep \"sig sig-object py\" build/html/dev/sampling_params.html + - grep \"sig sig-object py\" build/html/api/params.html - label: Async Engine, Inputs, Utils, Worker Test # 24min fast_check: true diff --git a/Dockerfile b/Dockerfile index 808cf675..4542bc9c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,8 @@ # to run the OpenAI compatible server. # Please update any changes made here to -# docs/source/dev/dockerfile/dockerfile.md and -# docs/source/assets/dev/dockerfile-stages-dependency.png +# docs/source/contributing/dockerfile/dockerfile.md and +# docs/source/assets/contributing/dockerfile-stages-dependency.png ARG CUDA_VERSION=12.4.1 #################### BASE BUILD IMAGE #################### diff --git a/docs/source/dev/engine/async_llm_engine.md b/docs/source/api/engine/async_llm_engine.md similarity index 100% rename from docs/source/dev/engine/async_llm_engine.md rename to docs/source/api/engine/async_llm_engine.md diff --git a/docs/source/dev/engine/engine_index.md b/docs/source/api/engine/index.md similarity index 100% rename from docs/source/dev/engine/engine_index.md rename to docs/source/api/engine/index.md diff --git a/docs/source/dev/engine/llm_engine.md b/docs/source/api/engine/llm_engine.md similarity index 100% rename from docs/source/dev/engine/llm_engine.md rename to docs/source/api/engine/llm_engine.md diff --git a/docs/source/design/multimodal/multimodal_index.md b/docs/source/api/multimodal/index.md similarity index 84% rename from docs/source/design/multimodal/multimodal_index.md rename to docs/source/api/multimodal/index.md index e4f2171e..0046b73e 100644 --- a/docs/source/design/multimodal/multimodal_index.md +++ b/docs/source/api/multimodal/index.md @@ -11,18 +11,8 @@ vLLM provides experimental support for multi-modal models through the {mod}`vllm Multi-modal inputs can be passed alongside text and token prompts to [supported models](#supported-mm-models) via the `multi_modal_data` field in {class}`vllm.inputs.PromptType`. -Currently, vLLM only has built-in support for image data. You can extend vLLM to process additional modalities -by following [this guide](#adding-multimodal-plugin). - Looking to add your own multi-modal model? Please follow the instructions listed [here](#enabling-multimodal-inputs). -## Guides - -```{toctree} -:maxdepth: 1 - -adding_multimodal_plugin -``` ## Module Contents diff --git a/docs/source/dev/offline_inference/offline_index.md b/docs/source/api/offline_inference/index.md similarity index 100% rename from docs/source/dev/offline_inference/offline_index.md rename to docs/source/api/offline_inference/index.md diff --git a/docs/source/dev/offline_inference/llm.md b/docs/source/api/offline_inference/llm.md similarity index 100% rename from docs/source/dev/offline_inference/llm.md rename to docs/source/api/offline_inference/llm.md diff --git a/docs/source/dev/offline_inference/llm_inputs.md b/docs/source/api/offline_inference/llm_inputs.md similarity index 100% rename from docs/source/dev/offline_inference/llm_inputs.md rename to docs/source/api/offline_inference/llm_inputs.md diff --git a/docs/source/api/params.md b/docs/source/api/params.md new file mode 100644 index 00000000..a3b4d9cb --- /dev/null +++ b/docs/source/api/params.md @@ -0,0 +1,22 @@ +# Optional Parameters + +Optional parameters for vLLM APIs. + +(sampling-params)= + +## Sampling Parameters + +```{eval-rst} +.. autoclass:: vllm.SamplingParams + :members: +``` + +(pooling-params)= + +## Pooling Parameters + +```{eval-rst} +.. autoclass:: vllm.PoolingParams + :members: +``` + diff --git a/docs/source/assets/dev/dockerfile-stages-dependency.png b/docs/source/assets/contributing/dockerfile-stages-dependency.png similarity index 100% rename from docs/source/assets/dev/dockerfile-stages-dependency.png rename to docs/source/assets/contributing/dockerfile-stages-dependency.png diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md index 38ea956b..cb142318 100644 --- a/docs/source/contributing/dockerfile/dockerfile.md +++ b/docs/source/contributing/dockerfile/dockerfile.md @@ -17,7 +17,7 @@ The edges of the build graph represent: - `RUN --mount=(.\*)from=...` dependencies (with a dotted line and an empty diamond arrow head) - > ```{figure} ../../assets/dev/dockerfile-stages-dependency.png + > ```{figure} /assets/contributing/dockerfile-stages-dependency.png > :align: center > :alt: query > :width: 100% diff --git a/docs/source/design/arch_overview.md b/docs/source/design/arch_overview.md index 5e0dd021..cec503ef 100644 --- a/docs/source/design/arch_overview.md +++ b/docs/source/design/arch_overview.md @@ -53,7 +53,7 @@ for output in outputs: ``` More API details can be found in the {doc}`Offline Inference -` section of the API docs. +` section of the API docs. The code for the `LLM` class can be found in . diff --git a/docs/source/design/multimodal/adding_multimodal_plugin.md b/docs/source/design/multimodal/adding_multimodal_plugin.md deleted file mode 100644 index bcccd284..00000000 --- a/docs/source/design/multimodal/adding_multimodal_plugin.md +++ /dev/null @@ -1,16 +0,0 @@ -(adding-multimodal-plugin)= - -# Adding a Multimodal Plugin - -This document teaches you how to add a new modality to vLLM. - -Each modality in vLLM is represented by a {class}`~vllm.multimodal.MultiModalPlugin` and registered to {data}`~vllm.multimodal.MULTIMODAL_REGISTRY`. -For vLLM to recognize a new modality type, you have to create a new plugin and then pass it to {meth}`~vllm.multimodal.MultiModalRegistry.register_plugin`. - -The remainder of this document details how to define custom {class}`~vllm.multimodal.MultiModalPlugin` s. - -```{note} -This article is a work in progress. -``` - -% TODO: Add more instructions on how to add new plugins once embeddings is in. diff --git a/docs/source/dev/pooling_params.md b/docs/source/dev/pooling_params.md deleted file mode 100644 index 74b2c574..00000000 --- a/docs/source/dev/pooling_params.md +++ /dev/null @@ -1,6 +0,0 @@ -# Pooling Parameters - -```{eval-rst} -.. autoclass:: vllm.PoolingParams - :members: -``` diff --git a/docs/source/dev/sampling_params.md b/docs/source/dev/sampling_params.md deleted file mode 100644 index bdc36af5..00000000 --- a/docs/source/dev/sampling_params.md +++ /dev/null @@ -1,6 +0,0 @@ -# Sampling Parameters - -```{eval-rst} -.. autoclass:: vllm.SamplingParams - :members: -``` diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index 6b56918c..2808e1b3 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -42,7 +42,7 @@ The first line of this example imports the classes {class}`~vllm.LLM` and {class from vllm import LLM, SamplingParams ``` -The next section defines a list of input prompts and sampling parameters for text generation. The [sampling temperature](https://arxiv.org/html/2402.05201v1) is set to `0.8` and the [nucleus sampling probability](https://en.wikipedia.org/wiki/Top-p_sampling) is set to `0.95`. You can find more information about the sampling parameters [here](https://docs.vllm.ai/en/stable/dev/sampling_params.html). +The next section defines a list of input prompts and sampling parameters for text generation. The [sampling temperature](https://arxiv.org/html/2402.05201v1) is set to `0.8` and the [nucleus sampling probability](https://en.wikipedia.org/wiki/Top-p_sampling) is set to `0.95`. You can find more information about the sampling parameters [here](#sampling-params). ```python prompts = [ diff --git a/docs/source/index.md b/docs/source/index.md index 11d3e24a..6747a7fc 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -137,10 +137,10 @@ community/sponsors :caption: API Reference :maxdepth: 2 -dev/sampling_params -dev/pooling_params -dev/offline_inference/offline_index -dev/engine/engine_index +api/offline_inference/index +api/engine/index +api/multimodal/index +api/params ``` % Design Documents: Details about vLLM internals @@ -154,7 +154,6 @@ design/huggingface_integration design/plugin_system design/kernel/paged_attention design/input_processing/model_inputs_index -design/multimodal/multimodal_index design/automatic_prefix_caching design/multiprocessing ``` diff --git a/docs/source/serving/offline_inference.md b/docs/source/serving/offline_inference.md index 83178f78..79092ab2 100644 --- a/docs/source/serving/offline_inference.md +++ b/docs/source/serving/offline_inference.md @@ -23,7 +23,7 @@ The available APIs depend on the type of model that is being run: Please refer to the above pages for more details about each API. ```{seealso} -[API Reference](/dev/offline_inference/offline_index) +[API Reference](/api/offline_inference/index) ``` ## Configuration Options diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index 022dd3ae..ec5a3675 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -195,7 +195,7 @@ Code example: #### Extra parameters -The following [sampling parameters (click through to see documentation)](../dev/sampling_params.md) are supported. +The following [sampling parameters](#sampling-params) are supported. ```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py :language: python @@ -226,7 +226,7 @@ Code example: #### Extra parameters -The following [sampling parameters (click through to see documentation)](../dev/sampling_params.md) are supported. +The following [sampling parameters](#sampling-params) are supported. ```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py :language: python @@ -259,7 +259,7 @@ Code example: #### Extra parameters -The following [pooling parameters (click through to see documentation)](../dev/pooling_params.md) are supported. +The following [pooling parameters](#pooling-params) are supported. ```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py :language: python @@ -447,7 +447,7 @@ Response: #### Extra parameters -The following [pooling parameters (click through to see documentation)](../dev/pooling_params.md) are supported. +The following [pooling parameters](#pooling-params) are supported. ```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py :language: python diff --git a/vllm/multimodal/base.py b/vllm/multimodal/base.py index 7f4029e7..4941fbac 100644 --- a/vllm/multimodal/base.py +++ b/vllm/multimodal/base.py @@ -49,9 +49,6 @@ class MultiModalPlugin(ABC): process the same data differently). This registry is in turn used by :class:`~MultiModalRegistry` which acts at a higher level (i.e., the modality of the data). - - See also: - :ref:`adding-multimodal-plugin` """ def __init__(self) -> None: diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py index 8fdcc4b5..d5424618 100644 --- a/vllm/multimodal/inputs.py +++ b/vllm/multimodal/inputs.py @@ -99,12 +99,6 @@ class MultiModalDataBuiltins(TypedDict, total=False): MultiModalDataDict: TypeAlias = Mapping[str, ModalityData[Any]] """ A dictionary containing an entry for each modality type to input. - -Note: - This dictionary also accepts modality keys defined outside - :class:`MultiModalDataBuiltins` as long as a customized plugin - is registered through the :class:`~vllm.multimodal.MULTIMODAL_REGISTRY`. - Read more on that :ref:`here `. """ diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 5f01eac4..9eceefb0 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -125,9 +125,6 @@ class MultiModalRegistry: def register_plugin(self, plugin: MultiModalPlugin) -> None: """ Register a multi-modal plugin so it can be recognized by vLLM. - - See also: - :ref:`adding-multimodal-plugin` """ data_type_key = plugin.get_data_key() diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py index 2635c0bc..b24b7e91 100644 --- a/vllm/pooling_params.py +++ b/vllm/pooling_params.py @@ -7,7 +7,7 @@ class PoolingParams( msgspec.Struct, omit_defaults=True, # type: ignore[call-arg] array_like=True): # type: ignore[call-arg] - """Pooling parameters for embeddings API. + """API parameters for pooling models. This is currently a placeholder. Attributes: additional_data: Any additional data needed for pooling.