[build-system] # Should be mirrored in requirements/build.txt requires = [ "cmake>=3.26", "ninja", "packaging", "setuptools>=61", "setuptools-scm>=8.0", "torch == 2.6.0", "wheel", "jinja2", ] build-backend = "setuptools.build_meta" [project] name = "vllm" authors = [{name = "vLLM Team"}] license = { "file"= "LICENSE" } readme = "README.md" description = "A high-throughput and memory-efficient inference and serving engine for LLMs" classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Information Analysis", ] requires-python = ">=3.9,<3.13" dynamic = [ "version", "dependencies", "optional-dependencies"] [project.urls] Homepage="https://github.com/vllm-project/vllm" Documentation="https://vllm.readthedocs.io/en/latest/" Slack="http://slack.vllm.ai/" [project.scripts] vllm = "vllm.entrypoints.cli.main:main" [tool.setuptools_scm] # no extra settings needed, presence enables setuptools-scm [tool.setuptools.packages.find] where = ["."] exclude = ["benchmarks", "csrc", "docs", "examples", "tests*"] namespaces = false [tool.yapfignore] ignore_patterns = [ "build/**", ] [tool.ruff] # Allow lines to be as long as 80. line-length = 80 exclude = [ # External file, leaving license intact "examples/other/fp8/quantizer/quantize.py" ] [tool.ruff.lint.per-file-ignores] "vllm/third_party/**" = ["ALL"] "vllm/version.py" = ["F401"] "vllm/_version.py" = ["ALL"] # Python 3.8 typing. TODO: Remove these excludes after v1.0.0 "vllm/adapter_commons/**/*.py" = ["UP006", "UP035"] "vllm/attention/**/*.py" = ["UP006", "UP035"] "vllm/compilation/**/*.py" = ["UP006", "UP035"] "vllm/core/**/*.py" = ["UP006", "UP035"] "vllm/device_allocator/**/*.py" = ["UP006", "UP035"] "vllm/distributed/**/*.py" = ["UP006", "UP035"] "vllm/engine/**/*.py" = ["UP006", "UP035"] "vllm/executor/**/*.py" = ["UP006", "UP035"] "vllm/lora/**/*.py" = ["UP006", "UP035"] "vllm/model_executor/**/*.py" = ["UP006", "UP035"] "vllm/platforms/**/*.py" = ["UP006", "UP035"] "vllm/plugins/**/*.py" = ["UP006", "UP035"] "vllm/profiler/**/*.py" = ["UP006", "UP035"] "vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"] "vllm/spec_decode/**/*.py" = ["UP006", "UP035"] "vllm/transformers_utils/**/*.py" = ["UP006", "UP035"] "vllm/triton_utils/**/*.py" = ["UP006", "UP035"] "vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"] "vllm/worker/**/*.py" = ["UP006", "UP035"] "vllm/utils.py" = ["UP006", "UP035"] [tool.ruff.lint] select = [ # pycodestyle "E", # Pyflakes "F", # pyupgrade "UP", # flake8-bugbear "B", # flake8-simplify "SIM", # isort # "I", "G", ] ignore = [ # star imports "F405", "F403", # lambda expression assignment "E731", # Loop control variable not used within loop body "B007", # f-string format "UP032", # Can remove once 3.10+ is the minimum Python version "UP007", ] [tool.mypy] ignore_missing_imports = true check_untyped_defs = true follow_imports = "silent" # After fixing type errors resulting from follow_imports: "skip" -> "silent", # move the directory here and remove it from tools/mypy.sh files = [ "vllm/*.py", "vllm/adapter_commons", "vllm/assets", "vllm/entrypoints", "vllm/core", "vllm/inputs", "vllm/logging_utils", "vllm/multimodal", "vllm/platforms", "vllm/transformers_utils", "vllm/triton_utils", "vllm/usage", ] # TODO(woosuk): Include the code from Megatron and HuggingFace. exclude = [ "vllm/model_executor/parallel_utils/|vllm/model_executor/models/", # Ignore triton kernels in ops. 'vllm/attention/ops/.*\.py$' ] [tool.codespell] ignore-words-list = "dout, te, indicies, subtile, ElementE" skip = "tests/models/fixtures/*,tests/prompts/*,benchmarks/sonnet.txt,tests/lora/data/*,build/*,vllm/third_party/*" [tool.isort] use_parentheses = true skip_gitignore = true [tool.pytest.ini_options] markers = [ "skip_global_cleanup", "core_model: enable this model test in each PR instead of only nightly", "cpu_model: enable this model test in CPU tests", "quant_model: run this model test under Quantized category", "split: run this test as part of a split", "distributed: run this test only in distributed GPU tests", "skip_v1: do not run this test with v1", "optional: optional tests that are automatically skipped, include --optional to run them", ] [tool.pymarkdown] plugins.md004.style = "sublist" # ul-style plugins.md013.enabled = false # line-length plugins.md041.enabled = false # first-line-h1 plugins.md033.enabled = false # inline-html plugins.md024.allow_different_nesting = true # no-duplicate-headers