[Misc] Add SPDX-License-Identifier headers to python source files (#12628)

- **Add SPDX license headers to python source files** - **Check for SPDX headers using pre-commit** commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-02-02 14:58:18 -05:00 · 2025-02-02 14:58:18 -05:00 · e489ad7a21
commit e489ad7a21
parent f256ebe4df
1012 changed files with 1884 additions and 2 deletions
--- a/.buildkite/check-wheel-size.py
+++ b/.buildkite/check-wheel-size.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
 import sys
 import zipfile
--- a/.buildkite/generate_index.py
+++ b/.buildkite/generate_index.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import os
--- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
+++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 LM eval harness on model to compare vs HF baseline computed offline.
 Configs are found in configs/$MODEL.yaml
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
 import os
 from pathlib import Path
--- a/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
+++ b/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 from transformers import AutoTokenizer
--- a/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import json
 from pathlib import Path
--- a/.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py
+++ b/.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from lmdeploy.serve.openai.api_client import APIClient
 api_client = APIClient("http://localhost:8000")
--- a/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
+++ b/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import datetime
 import json
 import os
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -97,10 +97,14 @@ repos:
    language: system
    verbose: true
    stages: [commit-msg]
  - id: check-spdx-header
    name: Check SPDX headers
    entry: python tools/check_spdx_header.py
    language: python
    types: [python]
  - id: suggestion
    name: Suggestion
    entry: bash -c 'echo "To bypass pre-commit hooks, add --no-verify to git commit."'
    language: system
    verbose: true
    pass_filenames: false
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
 import os
 import sys
--- a/benchmarks/benchmark_guided.py
+++ b/benchmarks/benchmark_guided.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """Benchmark guided decoding throughput."""
 import argparse
 import dataclasses
--- a/benchmarks/benchmark_latency.py
+++ b/benchmarks/benchmark_latency.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """Benchmark the latency of processing a single batch of requests."""
 import argparse
 import dataclasses
--- a/benchmarks/benchmark_long_document_qa_throughput.py
+++ b/benchmarks/benchmark_long_document_qa_throughput.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Offline benchmark to test the long document QA throughput.
--- a/benchmarks/benchmark_prefix_caching.py
+++ b/benchmarks/benchmark_prefix_caching.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Benchmark the efficiency of prefix caching.
--- a/benchmarks/benchmark_prioritization.py
+++ b/benchmarks/benchmark_prioritization.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """Benchmark offline prioritization."""
 import argparse
 import dataclasses
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 r"""Benchmark online serving throughput.
 On the server side, run one of the following commands:
--- a/benchmarks/benchmark_serving_guided.py
+++ b/benchmarks/benchmark_serving_guided.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 r"""Benchmark online serving throughput with guided decoding.
 On the server side, run one of the following commands:
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """Benchmark offline inference throughput."""
 import argparse
 import dataclasses
--- a/benchmarks/cutlass_benchmarks/sparse_benchmarks.py
+++ b/benchmarks/cutlass_benchmarks/sparse_benchmarks.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import copy
 import itertools
--- a/benchmarks/cutlass_benchmarks/utils.py
+++ b/benchmarks/cutlass_benchmarks/utils.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # Cutlass bench utils
 from typing import Iterable, Tuple
--- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
+++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import copy
 import itertools
--- a/benchmarks/cutlass_benchmarks/weight_shapes.py
+++ b/benchmarks/cutlass_benchmarks/weight_shapes.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # Weight Shapes are in the format
 # ([K, N], TP_SPLIT_DIM)
 # Example:
--- a/benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py
+++ b/benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
 import aiohttp
--- a/benchmarks/disagg_benchmarks/round_robin_proxy.py
+++ b/benchmarks/disagg_benchmarks/round_robin_proxy.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import asyncio
 import itertools
--- a/benchmarks/disagg_benchmarks/visualize_benchmark_results.py
+++ b/benchmarks/disagg_benchmarks/visualize_benchmark_results.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
 import matplotlib.pyplot as plt
--- a/benchmarks/fused_kernels/layernorm_rms_benchmarks.py
+++ b/benchmarks/fused_kernels/layernorm_rms_benchmarks.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import pickle as pkl
 import time
 from dataclasses import dataclass
--- a/benchmarks/kernels/benchmark_aqlm.py
+++ b/benchmarks/kernels/benchmark_aqlm.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
 import sys
 from typing import Optional
--- a/benchmarks/kernels/benchmark_layernorm.py
+++ b/benchmarks/kernels/benchmark_layernorm.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import time
 import torch
--- a/benchmarks/kernels/benchmark_lora.py
+++ b/benchmarks/kernels/benchmark_lora.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import copy
 import json
--- a/benchmarks/kernels/benchmark_machete.py
+++ b/benchmarks/kernels/benchmark_machete.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import copy
 import itertools
--- a/benchmarks/kernels/benchmark_marlin.py
+++ b/benchmarks/kernels/benchmark_marlin.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
 import torch
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import time
 from datetime import datetime
--- a/benchmarks/kernels/benchmark_paged_attention.py
+++ b/benchmarks/kernels/benchmark_paged_attention.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import random
 import time
 from typing import List, Optional
--- a/benchmarks/kernels/benchmark_quant.py
+++ b/benchmarks/kernels/benchmark_quant.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import time
 import torch
--- a/benchmarks/kernels/benchmark_rmsnorm.py
+++ b/benchmarks/kernels/benchmark_rmsnorm.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import itertools
 from typing import Optional, Tuple, Union
--- a/benchmarks/kernels/benchmark_rope.py
+++ b/benchmarks/kernels/benchmark_rope.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from itertools import accumulate
 from typing import List, Optional
--- a/benchmarks/kernels/benchmark_shapes.py
+++ b/benchmarks/kernels/benchmark_shapes.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 WEIGHT_SHAPES = {
    "ideal": [[4 * 256 * 32, 256 * 32]],
    "mistralai/Mistral-7B-v0.1/TP1": [
--- a/benchmarks/kernels/graph_machete_bench.py
+++ b/benchmarks/kernels/graph_machete_bench.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import math
 import pickle
 import re
--- a/benchmarks/kernels/utils.py
+++ b/benchmarks/kernels/utils.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import dataclasses
 from typing import Any, Callable, Iterable, Optional
--- a/benchmarks/kernels/weight_shapes.py
+++ b/benchmarks/kernels/weight_shapes.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # Weight Shapes are in the format
 # ([K, N], TP_SPLIT_DIM)
 # Example:
--- a/benchmarks/overheads/benchmark_hashing.py
+++ b/benchmarks/overheads/benchmark_hashing.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import cProfile
 import pstats
--- a/cmake/hipify.py
+++ b/cmake/hipify.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 #!/usr/bin/env python3
 #
--- a/collect_env.py
+++ b/collect_env.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # ruff: noqa
 # code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
--- a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
+++ b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import enum
 from typing import Dict, Union
--- a/csrc/quantization/machete/generate.py
+++ b/csrc/quantization/machete/generate.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import itertools
 import math
 import os
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # Configuration file for the Sphinx documentation builder.
 #
 # This file only contains a selection of the most common options. For a full
--- a/docs/source/generate_examples.py
+++ b/docs/source/generate_examples.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import itertools
 import re
 from dataclasses import dataclass, field
--- a/examples/offline_inference/aqlm_example.py
+++ b/examples/offline_inference/aqlm_example.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 from vllm.utils import FlexibleArgumentParser
--- a/examples/offline_inference/arctic.py
+++ b/examples/offline_inference/arctic.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 # Sample prompts.
--- a/examples/offline_inference/audio_language.py
+++ b/examples/offline_inference/audio_language.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use vLLM for running offline inference 
 with the correct prompt format on audio language models.
--- a/examples/offline_inference/basic.py
+++ b/examples/offline_inference/basic.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 # Sample prompts.
--- a/examples/offline_inference/basic_with_model_default_sampling.py
+++ b/examples/offline_inference/basic_with_model_default_sampling.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM
 # Sample prompts.
--- a/examples/offline_inference/chat.py
+++ b/examples/offline_inference/chat.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
--- a/examples/offline_inference/chat_with_tools.py
+++ b/examples/offline_inference/chat_with_tools.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # ruff: noqa
 import json
 import random
--- a/examples/offline_inference/classification.py
+++ b/examples/offline_inference/classification.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM
 # Sample prompts.
--- a/examples/offline_inference/cli.py
+++ b/examples/offline_inference/cli.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from dataclasses import asdict
 from vllm import LLM, SamplingParams
--- a/examples/offline_inference/cpu_offload.py
+++ b/examples/offline_inference/cpu_offload.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 # Sample prompts.
--- a/examples/offline_inference/distributed.py
+++ b/examples/offline_inference/distributed.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use Ray Data for running offline batch inference
 distributively on a multi-nodes cluster.
--- a/examples/offline_inference/embedding.py
+++ b/examples/offline_inference/embedding.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM
 # Sample prompts.
--- a/examples/offline_inference/encoder_decoder.py
+++ b/examples/offline_inference/encoder_decoder.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 '''
 Demonstrate prompting of text-to-text
 encoder/decoder models, specifically BART
--- a/examples/offline_inference/florence2_inference.py
+++ b/examples/offline_inference/florence2_inference.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 '''
 Demonstrate prompting of text-to-text
 encoder/decoder models, specifically Florence-2
--- a/examples/offline_inference/gguf_inference.py
+++ b/examples/offline_inference/gguf_inference.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from huggingface_hub import hf_hub_download
 from vllm import LLM, SamplingParams
--- a/examples/offline_inference/llm_engine_example.py
+++ b/examples/offline_inference/llm_engine_example.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 from typing import List, Tuple
--- a/examples/offline_inference/lora_with_quantization_inference.py
+++ b/examples/offline_inference/lora_with_quantization_inference.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use LoRA with different quantization techniques
 for offline inference.
--- a/examples/offline_inference/mlpspeculator.py
+++ b/examples/offline_inference/mlpspeculator.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import gc
 import time
 from typing import List
--- a/examples/offline_inference/multilora_inference.py
+++ b/examples/offline_inference/multilora_inference.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use the multi-LoRA functionality
 for offline inference.
--- a/examples/offline_inference/neuron.py
+++ b/examples/offline_inference/neuron.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 # Sample prompts.
--- a/examples/offline_inference/neuron_int8_quantization.py
+++ b/examples/offline_inference/neuron_int8_quantization.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
 from vllm import LLM, SamplingParams
--- a/examples/offline_inference/pixtral.py
+++ b/examples/offline_inference/pixtral.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # ruff: noqa
 import argparse
--- a/examples/offline_inference/prefix_caching.py
+++ b/examples/offline_inference/prefix_caching.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 from vllm.distributed import cleanup_dist_env_and_memory
--- a/examples/offline_inference/profiling.py
+++ b/examples/offline_inference/profiling.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import inspect
 import json
 import os
--- a/examples/offline_inference/profiling_tpu/profiling.py
+++ b/examples/offline_inference/profiling_tpu/profiling.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import dataclasses
 import os
--- a/examples/offline_inference/rlhf.py
+++ b/examples/offline_inference/rlhf.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 a simple demonstration of RLHF with vLLM, inspired by
 the OpenRLHF framework https://github.com/OpenRLHF/OpenRLHF .
--- a/examples/offline_inference/save_sharded_state.py
+++ b/examples/offline_inference/save_sharded_state.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Saves each worker's model state dict directly to a checkpoint, which enables a
 fast load path for large tensor-parallel models where each worker only needs to
--- a/examples/offline_inference/scoring.py
+++ b/examples/offline_inference/scoring.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM
 # Sample prompts.
--- a/examples/offline_inference/simple_profiling.py
+++ b/examples/offline_inference/simple_profiling.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time
--- a/examples/offline_inference/structured_outputs.py
+++ b/examples/offline_inference/structured_outputs.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from enum import Enum
 from pydantic import BaseModel
--- a/examples/offline_inference/torchrun_example.py
+++ b/examples/offline_inference/torchrun_example.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 experimental support for tensor-parallel inference with torchrun,
 see https://github.com/vllm-project/vllm/issues/11400 for
--- a/examples/offline_inference/tpu.py
+++ b/examples/offline_inference/tpu.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from vllm import LLM, SamplingParams
 prompts = [
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use vLLM for running offline inference with
 the correct prompt format on vision language models for text generation.
--- a/examples/offline_inference/vision_language_embedding.py
+++ b/examples/offline_inference/vision_language_embedding.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use vLLM for running offline inference with
 the correct prompt format on vision language models for multimodal embedding.
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 This example shows how to use vLLM for running offline inference with
 multi-image input on vision language models for text generation,
--- a/examples/offline_inference/whisper.py
+++ b/examples/offline_inference/whisper.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import time
 from vllm import LLM, SamplingParams
--- a/examples/online_serving/api_client.py
+++ b/examples/online_serving/api_client.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """Example Python client for `vllm.entrypoints.api_server`
 NOTE: The API server is used only for demonstration and simple performance
 benchmarks. It is not intended for production use.
--- a/examples/online_serving/cohere_rerank_client.py
+++ b/examples/online_serving/cohere_rerank_client.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Example of using the OpenAI entrypoint's rerank API which is compatible with
 the Cohere SDK: https://github.com/cohere-ai/cohere-python
--- a/examples/online_serving/gradio_openai_chatbot_webserver.py
+++ b/examples/online_serving/gradio_openai_chatbot_webserver.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import gradio as gr
--- a/examples/online_serving/gradio_webserver.py
+++ b/examples/online_serving/gradio_webserver.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import json
--- a/examples/online_serving/jinaai_rerank_client.py
+++ b/examples/online_serving/jinaai_rerank_client.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Example of using the OpenAI entrypoint's rerank API which is compatible with
 Jina and Cohere https://jina.ai/reranker
--- a/examples/online_serving/openai_chat_completion_client.py
+++ b/examples/online_serving/openai_chat_completion_client.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from openai import OpenAI
 # Modify OpenAI's API key and API base to use vLLM's API server.
--- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py
+++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """An example showing how to use vLLM to serve multimodal models 
 and run online serving with OpenAI client.
--- a/examples/online_serving/openai_chat_completion_client_with_tools.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Set up this example by starting a vLLM OpenAI-compatible server with tool call
 options enabled. For example:
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from enum import Enum
 from openai import OpenAI
--- a/examples/online_serving/openai_chat_completion_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_with_reasoning.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 An example shows how to generate chat completions from reasoning models
 like DeepSeekR1.
--- a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
+++ b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 An example shows how to generate chat completions from reasoning models
 like DeepSeekR1.
--- a/examples/online_serving/openai_chat_embedding_client_for_multimodal.py
+++ b/examples/online_serving/openai_chat_embedding_client_for_multimodal.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import base64
 import io
--- a/examples/online_serving/openai_completion_client.py
+++ b/examples/online_serving/openai_completion_client.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from openai import OpenAI
 # Modify OpenAI's API key and API base to use vLLM's API server.
--- a/examples/online_serving/openai_cross_encoder_score.py
+++ b/examples/online_serving/openai_cross_encoder_score.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Example online usage of Score API.
--- a/examples/online_serving/openai_embedding_client.py
+++ b/examples/online_serving/openai_embedding_client.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 from openai import OpenAI
 # Modify OpenAI's API key and API base to use vLLM's API server.
--- a/examples/online_serving/openai_pooling_client.py
+++ b/examples/online_serving/openai_pooling_client.py
@ -1,3 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 Example online usage of Pooling API.
--- a/examples/online_serving/opentelemetry/dummy_client.py
+++ b/examples/online_serving/opentelemetry/dummy_client.py
@ -1,3 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 import requests
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
    OTLPSpanExporter)
--- a/Show More
+++ b/Show More