Move linting to pre-commit (#11975)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-01-20 06:58:01 +00:00 · 2025-01-20 06:58:01 +00:00 · 3ea7b94523
commit 3ea7b94523
parent 51ef828f10
26 changed files with 724 additions and 1286 deletions
--- a/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
+++ b/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
@ -43,7 +43,7 @@ main() {
    


-    # The figures should be genereated by a separate process outside the CI/CD pipeline
+    # The figures should be generated by a separate process outside the CI/CD pipeline

    # # generate figures
    # python3 -m pip install tabulate pandas matplotlib
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@ -1,40 +0,0 @@
-name: Lint GitHub Actions workflows
-on:
-  push:
-    branches:
-      - "main"
-    paths:
-      - '.github/workflows/*.ya?ml'
-      - '.github/workflows/actionlint.*'
-      - '.github/workflows/matchers/actionlint.json'
-  pull_request:
-    branches:
-      - "main"
-    paths:
-      - '.github/workflows/*.ya?ml'
-      - '.github/workflows/actionlint.*'
-      - '.github/workflows/matchers/actionlint.json'
-
-env:
-  LC_ALL: en_US.UTF-8
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-
-jobs:
-  actionlint:
-    runs-on: ubuntu-latest
-    steps:
-      - name: "Checkout"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-
-      - name: "Run actionlint"
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-          tools/actionlint.sh -color
--- a/.github/workflows/clang-format.yml
+++ b/.github/workflows/clang-format.yml
@ -1,53 +0,0 @@
-name: clang-format
-
-on:
-  # Trigger the workflow on push or pull request,
-  # but only for the main branch
-  push:
-    branches:
-      - main
-    paths:
-      - '**/*.h'
-      - '**/*.cpp'
-      - '**/*.cu'
-      - '**/*.cuh'
-      - '.github/workflows/clang-format.yml'
-  pull_request:
-    branches:
-      - main
-    paths:
-      - '**/*.h'
-      - '**/*.cpp'
-      - '**/*.cu'
-      - '**/*.cuh'
-      - '.github/workflows/clang-format.yml'
-
-jobs:
-  clang-format:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.11"]
-    steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install clang-format==18.1.5
-    - name: Running clang-format
-      run: |
-        EXCLUDES=(
-            'csrc/moe/topk_softmax_kernels.cu'
-            'csrc/quantization/gguf/ggml-common.h'
-            'csrc/quantization/gguf/dequantize.cuh'
-            'csrc/quantization/gguf/vecdotq.cuh'
-            'csrc/quantization/gguf/mmq.cuh'
-            'csrc/quantization/gguf/mmvq.cuh'
-        )
-        find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
-            | grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
-            | xargs clang-format --dry-run --Werror
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@ -1,45 +0,0 @@
-name: codespell
-
-on:
-  # Trigger the workflow on push or pull request,
-  # but only for the main branch
-  push:
-    branches:
-      - main
-    paths:
-      - "**/*.py"
-      - "**/*.md"
-      - "**/*.rst"
-      - pyproject.toml
-      - requirements-lint.txt
-      - .github/workflows/codespell.yml
-  pull_request:
-    branches:
-      - main
-    paths:
-      - "**/*.py"
-      - "**/*.md"
-      - "**/*.rst"
-      - pyproject.toml
-      - requirements-lint.txt
-      - .github/workflows/codespell.yml
-
-jobs:
-  codespell:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements-lint.txt
-    - name: Spelling check with codespell
-      run: |
-        codespell --toml pyproject.toml
--- a/.github/workflows/doc-lint.yml
+++ b/.github/workflows/doc-lint.yml
@ -1,32 +0,0 @@
-name: Lint documentation
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "docs/**"
-  pull_request:
-    branches:
-      - main
-    paths:
-      - "docs/**"
-
-jobs:
-  doc-lint:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Linting docs
-        run: tools/doc-lint.sh
--- a/.github/workflows/dummy.yml
+++ b/.github/workflows/dummy.yml
@ -0,0 +1,20 @@
+name: dummy-checks
+
+on:
+  pull_request:
+
+jobs:
+  mypy:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - run: echo "This is a dummy step that always passes"
+  ruff:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - run: echo "This is a dummy step that always passes"
--- a/.github/workflows/matchers/ruff.json
+++ b/.github/workflows/matchers/ruff.json
@ -1,17 +0,0 @@
-{
-    "problemMatcher": [
-      {
-        "owner": "ruff",
-        "pattern": [
-          {
-            "regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
-            "file": 1,
-            "line": 2,
-            "column": 3,
-            "code": 4,
-            "message": 5
-          }
-        ]
-      }
-    ]
-  }
--- a/.github/workflows/mypy.yaml
+++ b/.github/workflows/mypy.yaml
@ -1,51 +0,0 @@
-name: mypy
-
-on:
-  # Trigger the workflow on push or pull request,
-  # but only for the main branch
-  push:
-    branches:
-      - main
-    paths:
-      - '**/*.py'
-      - '.github/workflows/mypy.yaml'
-      - 'tools/mypy.sh'
-      - 'pyproject.toml'
-  pull_request:
-    branches:
-      - main
-    # This workflow is only relevant when one of the following files changes.
-    # However, we have github configured to expect and require this workflow
-    # to run and pass before github with auto-merge a pull request. Until github
-    # allows more flexible auto-merge policy, we can just run this on every PR.
-    # It doesn't take that long to run, anyway.
-    #paths:
-    #  - '**/*.py'
-    #  - '.github/workflows/mypy.yaml'
-    #  - 'tools/mypy.sh'
-    #  - 'pyproject.toml'
-
-jobs:
-  mypy:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-    steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install mypy==1.11.1
-        pip install types-setuptools
-        pip install types-PyYAML
-        pip install types-requests
-        pip install types-setuptools
-    - name: Mypy
-      run: |
-        echo "::add-matcher::.github/workflows/matchers/mypy.json"
-        tools/mypy.sh 1 ${{ matrix.python-version }}
--- a/.github/workflows/png-lint.yml
+++ b/.github/workflows/png-lint.yml
@ -1,37 +0,0 @@
-name: Lint PNG exports from excalidraw
-on:
-  push:
-    branches:
-      - "main"
-    paths:
-      - '*.excalidraw.png'
-      - '.github/workflows/png-lint.yml'
-  pull_request:
-    branches:
-      - "main"
-    paths:
-      - '*.excalidraw.png'
-      - '.github/workflows/png-lint.yml'
-
-env:
-  LC_ALL: en_US.UTF-8
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-
-jobs:
-  actionlint:
-    runs-on: ubuntu-latest
-    steps:
-      - name: "Checkout"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-
-      - name: "Run png-lint.sh to check excalidraw exported images"
-        run: |
-          tools/png-lint.sh
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -0,0 +1,17 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+    - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+      with:
+        python-version: "3.12"
+    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@ -1,52 +0,0 @@
-name: ruff
-
-on:
-  # Trigger the workflow on push or pull request,
-  # but only for the main branch
-  push:
-    branches:
-      - main
-    paths:
-      - "**/*.py"
-      - pyproject.toml
-      - requirements-lint.txt
-      - .github/workflows/matchers/ruff.json
-      - .github/workflows/ruff.yml
-  pull_request:
-    branches:
-      - main
-    # This workflow is only relevant when one of the following files changes.
-    # However, we have github configured to expect and require this workflow
-    # to run and pass before github with auto-merge a pull request. Until github
-    # allows more flexible auto-merge policy, we can just run this on every PR.
-    # It doesn't take that long to run, anyway.
-    #paths:
-    #  - "**/*.py"
-    #  - pyproject.toml
-    #  - requirements-lint.txt
-    #  - .github/workflows/matchers/ruff.json
-    #  - .github/workflows/ruff.yml
-
-jobs:
-  ruff:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Analysing the code with ruff
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/ruff.json"
-          ruff check --output-format github .
-      - name: Run isort
-        run: |
-          isort . --check-only
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml
@ -1,37 +0,0 @@
-name: Lint shell scripts
-on:
-  push:
-    branches:
-      - "main"
-    paths:
-      - '**/*.sh'
-      - '.github/workflows/shellcheck.yml'
-  pull_request:
-    branches:
-      - "main"
-    paths:
-      - '**/*.sh'
-      - '.github/workflows/shellcheck.yml'
-
-env:
-  LC_ALL: en_US.UTF-8
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-
-jobs:
-  shellcheck:
-    runs-on: ubuntu-latest
-    steps:
-      - name: "Checkout"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-
-      - name: "Check shell scripts"
-        run: |
-          tools/shellcheck.sh
--- a/.github/workflows/yapf.yml
+++ b/.github/workflows/yapf.yml
@ -1,38 +0,0 @@
-name: yapf
-
-on:
-  # Trigger the workflow on push or pull request,
-  # but only for the main branch
-  push:
-    branches:
-      - main
-    paths:
-      - "**/*.py"
-      - .github/workflows/yapf.yml
-  pull_request:
-    branches:
-      - main
-    paths:
-      - "**/*.py"
-      - .github/workflows/yapf.yml
-
-jobs:
-  yapf:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install yapf==0.32.0
-          pip install toml==0.10.2
-      - name: Running yapf
-        run: |
-          yapf --diff --recursive .
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,73 @@
+repos:
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: [--in-place, --verbose]
+    additional_dependencies: [toml] # TODO: Remove when yapf is upgraded
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.6.5
+  hooks:
+  - id: ruff
+    args: [--output-format, github]
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.3.0
+  hooks:
+  - id: codespell
+    exclude: 'benchmarks/sonnet.txt|(build|tests/(lora/data|models/fixtures|prompts))/.*'
+- repo: https://github.com/PyCQA/isort
+  rev: 5.13.2
+  hooks:
+  - id: isort
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v18.1.5
+  hooks:
+  - id: clang-format
+    exclude: 'csrc/(moe/topk_softmax_kernels.cu|quantization/gguf/(ggml-common.h|dequantize.cuh|vecdotq.cuh|mmq.cuh|mmvq.cuh))'
+    types_or: [c++, cuda]
+    args: [--style=file, --verbose]
+- repo: https://github.com/jackdewinter/pymarkdown
+  rev: v0.9.27
+  hooks:
+  - id: pymarkdown
+    files: docs/.*
+- repo: local
+  hooks:
+  - id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.9
+    entry: tools/mypy.sh 1 "3.9"
+    language: python
+    types: [python]
+    additional_dependencies: &mypy_deps [mypy==1.11.1, types-setuptools, types-PyYAML, types-requests]
+  - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.10
+    entry: tools/mypy.sh 1 "3.10"
+    language: python
+    types: [python]
+    additional_dependencies: *mypy_deps
+  - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.11
+    entry: tools/mypy.sh 1 "3.11"
+    language: python
+    types: [python]
+    additional_dependencies: *mypy_deps
+  - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.12
+    entry: tools/mypy.sh 1 "3.12"
+    language: python
+    types: [python]
+    additional_dependencies: *mypy_deps
+  - id: shellcheck
+    name: Lint shell scripts
+    entry: tools/shellcheck.sh
+    language: script
+    types: [shell]
+  - id: png-lint
+    name: Lint PNG exports from excalidraw
+    entry: tools/png-lint.sh
+    language: script
+    types: [png]
+- repo: https://github.com/rhysd/actionlint
+  rev: v1.7.6
+  hooks:
+  - id: actionlint
--- a/csrc/cpu/cpu_types_arm.hpp
+++ b/csrc/cpu/cpu_types_arm.hpp
@ -24,7 +24,8 @@ namespace vec_op {
 #else
  #define CPU_KERNEL_GUARD_IN(NAME) \
    std::cout << #NAME << " invoked." << std::endl;
-#define CPU_KERNEL_GUARD_OUT(NAME) std::cout << #NAME << " exit." << std::endl;
+  #define CPU_KERNEL_GUARD_OUT(NAME) \
+    std::cout << #NAME << " exit." << std::endl;
 #endif

 #define FORCE_INLINE __attribute__((always_inline)) inline
@ -34,7 +35,7 @@ namespace {
 constexpr void unroll_loop_item(std::integer_sequence<T, indexes...>, F&& f) {
  (f(std::integral_constant<T, indexes>{}), ...);
 };
-}; 
+};  // namespace

 template <typename T, T count, typename F,
          typename = std::enable_if_t<std::is_invocable_v<F, T>>>
@ -42,7 +43,8 @@ constexpr void unroll_loop(F &&f) {
  unroll_loop_item(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
 }

-template <typename T> struct Vec {
+template <typename T>
+struct Vec {
  constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; };
 };

@ -59,9 +61,7 @@ struct FP16Vec8 : public Vec<FP16Vec8> {

  explicit FP16Vec8(const FP32Vec8&);

-  void save(void *ptr) const {
-    vst1q_f16(static_cast<__fp16 *>(ptr), reg);
-  }
+  void save(void* ptr) const { vst1q_f16(static_cast<__fp16*>(ptr), reg); }
 };

 struct FP16Vec16 : public Vec<FP16Vec16> {
@ -106,8 +106,7 @@ struct FP16Vec16 : public Vec<FP16Vec16> {
    if (remainder > 0) {
      float16x8_t temp = reg.val[full_blocks];
      __fp16* fp16_ptr = reinterpret_cast<__fp16*>(ptr);
-            switch (remainder)
-            {
+      switch (remainder) {
        case 1:
          fp16_ptr[full_blocks * 8 + 0] = vgetq_lane_f16(temp, 0);
          break;
@ -158,7 +157,6 @@ struct FP16Vec16 : public Vec<FP16Vec16> {
  }
 };

-
 #ifdef ARM_BF16_SUPPORT
 struct BF16Vec8 : public Vec<BF16Vec8> {
  constexpr static int VEC_ELEM_NUM = 8;
@ -172,7 +170,8 @@ struct BF16Vec8 : public Vec<BF16Vec8> {

  explicit BF16Vec8(const FP32Vec8&);

-  explicit BF16Vec8(float32x4x2_t v) : reg(vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[0]), v.val[1])) {};  
+  explicit BF16Vec8(float32x4x2_t v)
+      : reg(vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[0]), v.val[1])) {};

  void save(void* ptr) const { *reinterpret_cast<bfloat16x8_t*>(ptr) = reg; }
 };
@ -189,10 +188,9 @@ struct BF16Vec16 : public Vec<BF16Vec16> {

  explicit BF16Vec16(const FP32Vec16&);

-  explicit BF16Vec16(float32x4x4_t v) : reg({
-    vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[0]), v.val[1]),
-    vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[2]), v.val[3])
-  }){};
+  explicit BF16Vec16(float32x4x4_t v)
+      : reg({vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[0]), v.val[1]),
+             vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.val[2]), v.val[3])}) {};

  void save(void* ptr) const { *reinterpret_cast<bfloat16x8x2_t*>(ptr) = reg; };
 };
@ -207,12 +205,8 @@ struct BF16Vec32 : public Vec<BF16Vec32> {

  explicit BF16Vec32(bfloat16x8x4_t data) : reg(data) {};

-  explicit BF16Vec32(const BF16Vec8 &vec8_data) : reg({
-    vec8_data.reg,
-    vec8_data.reg,
-    vec8_data.reg,
-    vec8_data.reg
-  }) {};
+  explicit BF16Vec32(const BF16Vec8& vec8_data)
+      : reg({vec8_data.reg, vec8_data.reg, vec8_data.reg, vec8_data.reg}) {};

  void save(void* ptr) const { *reinterpret_cast<bfloat16x8x4_t*>(ptr) = reg; };
 };
@ -252,7 +246,8 @@ struct FP32Vec8 : public Vec<FP32Vec8> {

  explicit FP32Vec8() : reg({vmovq_n_f32(0.0), vmovq_n_f32(0.0)}) {};

-  explicit FP32Vec8(const float *ptr) : reg({vld1q_f32(ptr), vld1q_f32(ptr + 4)}) {};
+  explicit FP32Vec8(const float* ptr)
+      : reg({vld1q_f32(ptr), vld1q_f32(ptr + 4)}) {};

  explicit FP32Vec8(float32x4x2_t data) : reg(data) {};

@ -263,13 +258,16 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
    reg.val[1] = vcvt_f32_f16(vget_high_f16(v.reg));
  };

-  explicit FP32Vec8(float16x8_t v) : reg({vcvt_f32_f16(vget_low_f16(v)), vcvt_f32_f16(vget_high_f16(v))}) {};
+  explicit FP32Vec8(float16x8_t v)
+      : reg({vcvt_f32_f16(vget_low_f16(v)), vcvt_f32_f16(vget_high_f16(v))}) {};

 #ifdef ARM_BF16_SUPPORT

-  explicit FP32Vec8(bfloat16x8_t v) : reg({vcvtq_low_f32_bf16(v), vcvtq_high_f32_bf16(v)}) {};
+  explicit FP32Vec8(bfloat16x8_t v)
+      : reg({vcvtq_low_f32_bf16(v), vcvtq_high_f32_bf16(v)}) {};

-  explicit FP32Vec8(const BF16Vec8 &v) : reg({vcvtq_low_f32_bf16(v.reg), vcvtq_high_f32_bf16(v.reg)}) {};
+  explicit FP32Vec8(const BF16Vec8& v)
+      : reg({vcvtq_low_f32_bf16(v.reg), vcvtq_high_f32_bf16(v.reg)}) {};

 #endif

@ -277,7 +275,8 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
    AliasReg ar;
    ar.reg = reg;
    float answer = 0;
-    unroll_loop<int, VEC_ELEM_NUM>([&answer, &ar](int i) { answer += ar.values[i]; });
+    unroll_loop<int, VEC_ELEM_NUM>(
+        [&answer, &ar](int i) { answer += ar.values[i]; });

    return answer;
  }
@ -324,10 +323,14 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
    AliasReg ar;
    ar.reg = reg;

-    float32x2_t er_vec0 = {static_cast<float32_t>(erf(ar.values[0])), static_cast<float32_t>(erf(ar.values[1]))};
-    float32x2_t er_vec1 = {static_cast<float32_t>(erf(ar.values[2])), static_cast<float32_t>(erf(ar.values[3]))};
-    float32x2_t er_vec2 = {static_cast<float32_t>(erf(ar.values[4])), static_cast<float32_t>(erf(ar.values[5]))};
-    float32x2_t er_vec3 = {static_cast<float32_t>(erf(ar.values[6])), static_cast<float32_t>(erf(ar.values[7]))};
+    float32x2_t er_vec0 = {static_cast<float32_t>(erf(ar.values[0])),
+                           static_cast<float32_t>(erf(ar.values[1]))};
+    float32x2_t er_vec1 = {static_cast<float32_t>(erf(ar.values[2])),
+                           static_cast<float32_t>(erf(ar.values[3]))};
+    float32x2_t er_vec2 = {static_cast<float32_t>(erf(ar.values[4])),
+                           static_cast<float32_t>(erf(ar.values[5]))};
+    float32x2_t er_vec3 = {static_cast<float32_t>(erf(ar.values[6])),
+                           static_cast<float32_t>(erf(ar.values[7]))};

    float32x4_t result0 = vcombine_f32(er_vec0, er_vec1);
    float32x4_t result1 = vcombine_f32(er_vec2, er_vec3);
@ -340,19 +343,23 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
  }

  FP32Vec8 operator*(const FP32Vec8& b) const {
-    return FP32Vec8(float32x4x2_t({vmulq_f32(reg.val[0], b.reg.val[0]), vmulq_f32(reg.val[1], b.reg.val[1])}));
+    return FP32Vec8(float32x4x2_t({vmulq_f32(reg.val[0], b.reg.val[0]),
+                                   vmulq_f32(reg.val[1], b.reg.val[1])}));
  }

  FP32Vec8 operator+(const FP32Vec8& b) const {
-    return FP32Vec8(float32x4x2_t({vaddq_f32(reg.val[0], b.reg.val[0]), vaddq_f32(reg.val[1], b.reg.val[1])}));
+    return FP32Vec8(float32x4x2_t({vaddq_f32(reg.val[0], b.reg.val[0]),
+                                   vaddq_f32(reg.val[1], b.reg.val[1])}));
  }

  FP32Vec8 operator-(const FP32Vec8& b) const {
-    return FP32Vec8(float32x4x2_t({vsubq_f32(reg.val[0], b.reg.val[0]), vsubq_f32(reg.val[1], b.reg.val[1])}));
+    return FP32Vec8(float32x4x2_t({vsubq_f32(reg.val[0], b.reg.val[0]),
+                                   vsubq_f32(reg.val[1], b.reg.val[1])}));
  }

  FP32Vec8 operator/(const FP32Vec8& b) const {
-    return FP32Vec8(float32x4x2_t({vdivq_f32(reg.val[0], b.reg.val[0]), vdivq_f32(reg.val[1], b.reg.val[1])}));
+    return FP32Vec8(float32x4x2_t({vdivq_f32(reg.val[0], b.reg.val[0]),
+                                   vdivq_f32(reg.val[1], b.reg.val[1])}));
  }

  void save(float* ptr) const {
@ -370,11 +377,16 @@ struct FP32Vec16 : public Vec<FP32Vec16> {

  float32x4x4_t reg;

-  explicit FP32Vec16(float v) : reg({vmovq_n_f32(v), vmovq_n_f32(v), vmovq_n_f32(v), vmovq_n_f32(v)}) {}
+  explicit FP32Vec16(float v)
+      : reg({vmovq_n_f32(v), vmovq_n_f32(v), vmovq_n_f32(v), vmovq_n_f32(v)}) {}

-  explicit FP32Vec16() : reg({vmovq_n_f32(0.0), vmovq_n_f32(0.0), vmovq_n_f32(0.0), vmovq_n_f32(0.0)}) {}
+  explicit FP32Vec16()
+      : reg({vmovq_n_f32(0.0), vmovq_n_f32(0.0), vmovq_n_f32(0.0),
+             vmovq_n_f32(0.0)}) {}

-  explicit FP32Vec16(const float *ptr) : reg({vld1q_f32(ptr), vld1q_f32(ptr + 4), vld1q_f32(ptr + 8), vld1q_f32(ptr + 12)}) {}
+  explicit FP32Vec16(const float* ptr)
+      : reg({vld1q_f32(ptr), vld1q_f32(ptr + 4), vld1q_f32(ptr + 8),
+             vld1q_f32(ptr + 12)}) {}

  explicit FP32Vec16(float32x4x4_t data) : reg(data) {}

@ -390,12 +402,9 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  explicit FP32Vec16(const FP16Vec8& v) : FP32Vec16(FP32Vec8(v.reg)) {}

 #ifdef ARM_BF16_SUPPORT
-  explicit FP32Vec16(bfloat16x8x2_t v) : reg({
-    vcvtq_low_f32_bf16(v.val[0]),
-    vcvtq_high_f32_bf16(v.val[0]),
-    vcvtq_low_f32_bf16(v.val[1]),
-    vcvtq_high_f32_bf16(v.val[1])
-  }) {};
+  explicit FP32Vec16(bfloat16x8x2_t v)
+      : reg({vcvtq_low_f32_bf16(v.val[0]), vcvtq_high_f32_bf16(v.val[0]),
+             vcvtq_low_f32_bf16(v.val[1]), vcvtq_high_f32_bf16(v.val[1])}) {};
 #endif

  explicit FP32Vec16(const FP32Vec4& data) {
@ -406,12 +415,11 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  };

 #ifdef ARM_BF16_SUPPORT
-  explicit FP32Vec16(const BF16Vec16 &v) : reg({
-    vcvtq_low_f32_bf16(v.reg.val[0]),
+  explicit FP32Vec16(const BF16Vec16& v)
+      : reg({vcvtq_low_f32_bf16(v.reg.val[0]),
             vcvtq_high_f32_bf16(v.reg.val[0]),
             vcvtq_low_f32_bf16(v.reg.val[1]),
-    vcvtq_high_f32_bf16(v.reg.val[1])
-  }) {};
+             vcvtq_high_f32_bf16(v.reg.val[1])}) {};

  explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {};
 #endif
@ -424,49 +432,45 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  };

  FP32Vec16 operator+(const FP32Vec16& b) const {
-    return FP32Vec16(float32x4x4_t({
-        vaddq_f32(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(float32x4x4_t({vaddq_f32(reg.val[0], b.reg.val[0]),
                                    vaddq_f32(reg.val[1], b.reg.val[1]),
                                    vaddq_f32(reg.val[2], b.reg.val[2]),
                                    vaddq_f32(reg.val[3], b.reg.val[3])}));
  };

  FP32Vec16 operator*(const FP32Vec16& b) const {
-    return FP32Vec16(float32x4x4_t({
-        vmulq_f32(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(float32x4x4_t({vmulq_f32(reg.val[0], b.reg.val[0]),
                                    vmulq_f32(reg.val[1], b.reg.val[1]),
                                    vmulq_f32(reg.val[2], b.reg.val[2]),
                                    vmulq_f32(reg.val[3], b.reg.val[3])}));
  };

  FP32Vec16 operator-(const FP32Vec16& b) const {
-    return FP32Vec16(float32x4x4_t({
-        vsubq_f32(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(float32x4x4_t({vsubq_f32(reg.val[0], b.reg.val[0]),
                                    vsubq_f32(reg.val[1], b.reg.val[1]),
                                    vsubq_f32(reg.val[2], b.reg.val[2]),
-        vsubq_f32(reg.val[3], b.reg.val[3])
-    }));
+                                    vsubq_f32(reg.val[3], b.reg.val[3])}));
  };

  FP32Vec16 operator/(const FP32Vec16& b) const {
-    return FP32Vec16(float32x4x4_t({
-        vdivq_f32(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(float32x4x4_t({vdivq_f32(reg.val[0], b.reg.val[0]),
                                    vdivq_f32(reg.val[1], b.reg.val[1]),
                                    vdivq_f32(reg.val[2], b.reg.val[2]),
-        vdivq_f32(reg.val[3], b.reg.val[3])
-    }));
+                                    vdivq_f32(reg.val[3], b.reg.val[3])}));
  };

  float reduce_sum() const {
    AliasReg ar;
    ar.reg = reg;
    float answer = 0;
-    unroll_loop<int, VEC_ELEM_NUM>([&answer, &ar](int i) { answer += ar.values[i]; });
+    unroll_loop<int, VEC_ELEM_NUM>(
+        [&answer, &ar](int i) { answer += ar.values[i]; });

    return answer;
  };

-  template <int group_size> float reduce_sub_sum(int idx) {
+  template <int group_size>
+  float reduce_sub_sum(int idx) {
    static_assert(VEC_ELEM_NUM % group_size == 0);

    AliasReg ar;
@ -487,21 +491,38 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  };
 };

-template <typename T> struct VecType { using vec_type = void; };
+template <typename T>
+struct VecType {
+  using vec_type = void;
+};

-template <typename T> using vec_t = typename VecType<T>::vec_type;
+template <typename T>
+using vec_t = typename VecType<T>::vec_type;

-template <> struct VecType<float> { using vec_type = FP32Vec8; };
+template <>
+struct VecType<float> {
+  using vec_type = FP32Vec8;
+};

-template <> struct VecType<c10::Half> { using vec_type = FP16Vec8; };
+template <>
+struct VecType<c10::Half> {
+  using vec_type = FP16Vec8;
+};

 #ifdef ARM_BF16_SUPPORT
-template <> struct VecType<c10::BFloat16> { using vec_type = BF16Vec8; };
+template <>
+struct VecType<c10::BFloat16> {
+  using vec_type = BF16Vec8;
+};
 #endif

-template <typename T> void storeFP32(float v, T *ptr) { *ptr = v; }
+template <typename T>
+void storeFP32(float v, T* ptr) {
+  *ptr = v;
+}

-template <> inline void storeFP32<c10::Half>(float v, c10::Half *ptr) {
+template <>
+inline void storeFP32<c10::Half>(float v, c10::Half* ptr) {
  *reinterpret_cast<__fp16*>(ptr) = v;
 }

@ -523,7 +544,6 @@ inline FP16Vec8 :: FP16Vec8(const FP32Vec8 &v) {
 };

 inline void fma(FP32Vec16& acc, FP32Vec16& a, FP32Vec16& b) {
-
  acc.reg.val[0] = vfmaq_f32(acc.reg.val[0], a.reg.val[0], b.reg.val[0]);
  acc.reg.val[1] = vfmaq_f32(acc.reg.val[1], a.reg.val[1], b.reg.val[1]);
  acc.reg.val[2] = vfmaq_f32(acc.reg.val[2], a.reg.val[2], b.reg.val[2]);
@ -532,7 +552,6 @@ inline void fma(FP32Vec16 &acc, FP32Vec16 &a, FP32Vec16 &b) {

 #ifdef ARM_BF16_SUPPORT
 inline void fma(FP32Vec16& acc, BF16Vec32& a, BF16Vec32& b) {
-
  float32x4_t a0_low = vcvt_f32_bf16(vget_low_bf16(a.reg.val[0]));
  float32x4_t a0_high = vcvt_f32_bf16(vget_high_bf16(a.reg.val[0]));
  float32x4_t a1_low = vcvt_f32_bf16(vget_low_bf16(a.reg.val[1]));
@ -551,17 +570,17 @@ inline void fma(FP32Vec16 &acc, BF16Vec32 &a, BF16Vec32 &b) {
 #endif

 #ifdef ARM_BF16_SUPPORT
-inline BF16Vec8::BF16Vec8(const FP32Vec8 &v) : reg(vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[0]), v.reg.val[1])) {};
+inline BF16Vec8::BF16Vec8(const FP32Vec8& v)
+    : reg(vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[0]), v.reg.val[1])) {
+      };

-inline BF16Vec16::BF16Vec16(const FP32Vec16 &v) : reg({
-    vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[0]), v.reg.val[1]),
-    vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[2]), v.reg.val[3])
-  }){};
+inline BF16Vec16::BF16Vec16(const FP32Vec16& v)
+    : reg({vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[0]), v.reg.val[1]),
+           vcvtq_high_bf16_f32(vcvtq_low_bf16_f32(v.reg.val[2]),
+                               v.reg.val[3])}) {};
 #endif

-inline void prefetch(const void *addr) {
-    __builtin_prefetch(addr, 0, 1);
-};
+inline void prefetch(const void* addr) { __builtin_prefetch(addr, 0, 1); };

 #ifdef ARM_BF16_SUPPORT
 template <>
@ -569,4 +588,4 @@ inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16 *ptr) {
  *reinterpret_cast<__bf16*>(ptr) = vcvth_bf16_f32(v);
 };
 #endif
-};
+};  // namespace vec_op
--- a/csrc/cpu/cpu_types_vsx.hpp
+++ b/csrc/cpu/cpu_types_vsx.hpp
@ -22,7 +22,8 @@ namespace vec_op {
 #else
  #define CPU_KERNEL_GUARD_IN(NAME) \
    std::cout << #NAME << " invoked." << std::endl;
-#define CPU_KERNEL_GUARD_OUT(NAME) std::cout << #NAME << " exit." << std::endl;
+  #define CPU_KERNEL_GUARD_OUT(NAME) \
+    std::cout << #NAME << " exit." << std::endl;
 #endif

 #define FORCE_INLINE __attribute__((always_inline)) inline
@ -40,7 +41,8 @@ constexpr void unroll_loop(F &&f) {
  unroll_loop_item(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
 }

-template <typename T> struct Vec {
+template <typename T>
+struct Vec {
  constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; }
 };

@ -73,7 +75,9 @@ struct BF16Vec8 : public Vec<BF16Vec8> {

  explicit BF16Vec8(const FP32Vec8&);

-  void save(void *ptr) const { *reinterpret_cast<__vector signed short *>(ptr) = reg; }
+  void save(void* ptr) const {
+    *reinterpret_cast<__vector signed short*>(ptr) = reg;
+  }
 };

 struct BF16Vec16 : public Vec<BF16Vec16> {
@ -107,12 +111,8 @@ struct BF16Vec32 : public Vec<BF16Vec32> {

  explicit BF16Vec32(ss16x8x4_t data) : reg(data) {}

-  explicit BF16Vec32(const BF16Vec8 &vec8_data) : reg({
-    vec8_data.reg,
-    vec8_data.reg,
-    vec8_data.reg,
-    vec8_data.reg
-  }) {}
+  explicit BF16Vec32(const BF16Vec8& vec8_data)
+      : reg({vec8_data.reg, vec8_data.reg, vec8_data.reg, vec8_data.reg}) {}

  void save(void* ptr) const { *reinterpret_cast<ss16x8x4_t*>(ptr) = reg; }
 };
@ -177,7 +177,8 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
    AliasReg ar;
    ar.reg = reg;
    float result = 0;
-    unroll_loop<int, VEC_ELEM_NUM>([&result, &ar](int i) { result += ar.values[i]; });
+    unroll_loop<int, VEC_ELEM_NUM>(
+        [&result, &ar](int i) { result += ar.values[i]; });

    return result;
  }
@ -231,19 +232,23 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
  }

  FP32Vec8 operator*(const FP32Vec8& b) const {
-    return FP32Vec8({vec_mul(reg.val[0], b.reg.val[0]), vec_mul(reg.val[1], b.reg.val[1])});
+    return FP32Vec8(
+        {vec_mul(reg.val[0], b.reg.val[0]), vec_mul(reg.val[1], b.reg.val[1])});
  }

  FP32Vec8 operator+(const FP32Vec8& b) const {
-    return FP32Vec8({vec_add(reg.val[0], b.reg.val[0]), vec_add(reg.val[1], b.reg.val[1])});
+    return FP32Vec8(
+        {vec_add(reg.val[0], b.reg.val[0]), vec_add(reg.val[1], b.reg.val[1])});
  }

  FP32Vec8 operator-(const FP32Vec8& b) const {
-    return FP32Vec8({vec_sub(reg.val[0], b.reg.val[0]), vec_sub(reg.val[1], b.reg.val[1])});
+    return FP32Vec8(
+        {vec_sub(reg.val[0], b.reg.val[0]), vec_sub(reg.val[1], b.reg.val[1])});
  }

  FP32Vec8 operator/(const FP32Vec8& b) const {
-    return FP32Vec8({vec_div(reg.val[0], b.reg.val[0]), vec_div(reg.val[1], b.reg.val[1])});
+    return FP32Vec8(
+        {vec_div(reg.val[0], b.reg.val[0]), vec_div(reg.val[1], b.reg.val[1])});
  }

  void save(float* ptr) const {
@ -315,32 +320,28 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}

  FP32Vec16 operator*(const FP32Vec16& b) const {
-    return FP32Vec16(f32x4x4_t({
-        vec_mul(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(f32x4x4_t({vec_mul(reg.val[0], b.reg.val[0]),
                                vec_mul(reg.val[1], b.reg.val[1]),
                                vec_mul(reg.val[2], b.reg.val[2]),
                                vec_mul(reg.val[3], b.reg.val[3])}));
  }

  FP32Vec16 operator+(const FP32Vec16& b) const {
-    return FP32Vec16(f32x4x4_t({
-        vec_add(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(f32x4x4_t({vec_add(reg.val[0], b.reg.val[0]),
                                vec_add(reg.val[1], b.reg.val[1]),
                                vec_add(reg.val[2], b.reg.val[2]),
                                vec_add(reg.val[3], b.reg.val[3])}));
  }

  FP32Vec16 operator-(const FP32Vec16& b) const {
-    return FP32Vec16(f32x4x4_t({
-        vec_sub(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(f32x4x4_t({vec_sub(reg.val[0], b.reg.val[0]),
                                vec_sub(reg.val[1], b.reg.val[1]),
                                vec_sub(reg.val[2], b.reg.val[2]),
                                vec_sub(reg.val[3], b.reg.val[3])}));
  }

  FP32Vec16 operator/(const FP32Vec16& b) const {
-    return FP32Vec16(f32x4x4_t({
-        vec_div(reg.val[0], b.reg.val[0]),
+    return FP32Vec16(f32x4x4_t({vec_div(reg.val[0], b.reg.val[0]),
                                vec_div(reg.val[1], b.reg.val[1]),
                                vec_div(reg.val[2], b.reg.val[2]),
                                vec_div(reg.val[3], b.reg.val[3])}));
@ -350,12 +351,14 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
    AliasReg ar;
    ar.reg = reg;
    float result = 0;
-    unroll_loop<int, VEC_ELEM_NUM>([&result, &ar](int i) { result += ar.values[i]; });
+    unroll_loop<int, VEC_ELEM_NUM>(
+        [&result, &ar](int i) { result += ar.values[i]; });

    return result;
  }

-  template <int group_size> float reduce_sub_sum(int idx) {
+  template <int group_size>
+  float reduce_sub_sum(int idx) {
    static_assert(VEC_ELEM_NUM % group_size == 0);

    AliasReg ar;
@ -376,21 +379,35 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  }
 };

-template <typename T> struct VecType { using vec_type = void; };
+template <typename T>
+struct VecType {
+  using vec_type = void;
+};

-template <typename T> using vec_t = typename VecType<T>::vec_type;
+template <typename T>
+using vec_t = typename VecType<T>::vec_type;

-template <> struct VecType<float> { using vec_type = FP32Vec8; };
+template <>
+struct VecType<float> {
+  using vec_type = FP32Vec8;
+};

-template <> struct VecType<c10::BFloat16> { using vec_type = BF16Vec8; };
+template <>
+struct VecType<c10::BFloat16> {
+  using vec_type = BF16Vec8;
+};

-template <typename T> void storeFP32(float v, T *ptr) { *ptr = v; }
+template <typename T>
+void storeFP32(float v, T* ptr) {
+  *ptr = v;
+}

 inline void fma(FP32Vec16& acc, FP32Vec16& a, FP32Vec16& b) {
  acc = acc + a * b;
 }

-template <> inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16 *ptr) {
+template <>
+inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
  c10::BFloat16 __attribute__((__may_alias__))* v_ptr =
      reinterpret_cast<c10::BFloat16*>(&v);
  *ptr = *(v_ptr + 1);
@ -400,10 +417,13 @@ template <> inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16 *ptr) {
  #define __VEC_CLASS_FP_NAN (1 << 6)
 #endif

-const static __vector unsigned char omask = { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+const static __vector unsigned char omask = {0,  1,  4,  5,  8,  9,  12, 13,
+                                             16, 17, 20, 21, 24, 25, 28, 29};
 #ifndef _ARCH_PWR10
-const static __vector unsigned int bias = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff };
-const static __vector unsigned int nan  = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 };
+const static __vector unsigned int bias = {0x00007fff, 0x00007fff, 0x00007fff,
+                                           0x00007fff};
+const static __vector unsigned int nan = {0x7fc00000, 0x7fc00000, 0x7fc00000,
+                                          0x7fc00000};
 const static __vector unsigned int sh16 = {16, 16, 16, 16};
 const static __vector unsigned int one = {1, 1, 1, 1};
 #endif
@ -411,8 +431,10 @@ const static __vector unsigned int one  = { 1, 1, 1, 1 };
 inline BF16Vec8::BF16Vec8(const FP32Vec8& v) {
 #ifdef _ARCH_PWR10
  __vector signed short ret[2];
-  ret[0] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[0]);
-  ret[1] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[1]);
+  ret[0] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[0]);
+  ret[1] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[1]);
  reg = vec_perm(ret[0], ret[1], omask);
 #elif defined(_ARCH_PWR9)
  __vector unsigned int inp0 = (__vector unsigned int)(v.reg.val[0]);
@ -425,8 +447,10 @@ inline BF16Vec8::BF16Vec8(const FP32Vec8 &v) {
  __vector unsigned int rnd1 = vec_add(lsb1, bias);
  inp0 = vec_add(inp0, rnd0);
  inp1 = vec_add(inp1, rnd1);
-  __vector __bool int sel0 = vec_test_data_class(v.reg.val[0], __VEC_CLASS_FP_NAN);
-  __vector __bool int sel1 = vec_test_data_class(v.reg.val[1], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel0 =
+      vec_test_data_class(v.reg.val[0], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel1 =
+      vec_test_data_class(v.reg.val[1], __VEC_CLASS_FP_NAN);
  inp0 = vec_sel(inp0, nan, sel0);
  inp1 = vec_sel(inp1, nan, sel1);
  inp0 = vec_sr(inp0, sh16);
@ -438,10 +462,14 @@ inline BF16Vec8::BF16Vec8(const FP32Vec8 &v) {
 inline BF16Vec16::BF16Vec16(const FP32Vec16& v) {
 #ifdef _ARCH_PWR10
  __vector signed short ret[4];
-  ret[0] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[0]);
-  ret[1] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[1]);
-  ret[2] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[2]);
-  ret[3] = (__vector signed short)__builtin_vsx_xvcvspbf16((__vector unsigned char)v.reg.val[3]);
+  ret[0] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[0]);
+  ret[1] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[1]);
+  ret[2] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[2]);
+  ret[3] = (__vector signed short)__builtin_vsx_xvcvspbf16(
+      (__vector unsigned char)v.reg.val[3]);
  reg.val[0] = vec_perm(ret[0], ret[1], omask);
  reg.val[1] = vec_perm(ret[2], ret[3], omask);
 #elif defined(_ARCH_PWR9)
@ -465,10 +493,14 @@ inline BF16Vec16::BF16Vec16(const FP32Vec16 &v) {
  inp1 = vec_add(inp1, rnd1);
  inp2 = vec_add(inp2, rnd2);
  inp3 = vec_add(inp3, rnd3);
-  __vector __bool int sel0 = vec_test_data_class(v.reg.val[0], __VEC_CLASS_FP_NAN);
-  __vector __bool int sel1 = vec_test_data_class(v.reg.val[1], __VEC_CLASS_FP_NAN);
-  __vector __bool int sel2 = vec_test_data_class(v.reg.val[2], __VEC_CLASS_FP_NAN);
-  __vector __bool int sel3 = vec_test_data_class(v.reg.val[3], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel0 =
+      vec_test_data_class(v.reg.val[0], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel1 =
+      vec_test_data_class(v.reg.val[1], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel2 =
+      vec_test_data_class(v.reg.val[2], __VEC_CLASS_FP_NAN);
+  __vector __bool int sel3 =
+      vec_test_data_class(v.reg.val[3], __VEC_CLASS_FP_NAN);
  inp0 = vec_sel(inp0, nan, sel0);
  inp1 = vec_sel(inp1, nan, sel1);
  inp2 = vec_sel(inp2, nan, sel2);
--- a/csrc/cpu/cpu_types_x86.hpp
+++ b/csrc/cpu/cpu_types_x86.hpp
@ -43,7 +43,8 @@ constexpr void unroll_loop(F &&f) {
  unroll_loop_item(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
 }

-template <typename T> struct Vec {
+template <typename T>
+struct Vec {
  constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; }
 };

@ -145,8 +146,8 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
      : reg_low(_mm256_loadu_si256((__m256i const*)ptr)),
        reg_high(_mm256_loadu_si256((__m256i const*)ptr + 1)) {}

-  explicit BF16Vec32(__m256i low, __m256i high) : reg_low(low),
-                                                  reg_high(high) {}
+  explicit BF16Vec32(__m256i low, __m256i high)
+      : reg_low(low), reg_high(high) {}

  explicit BF16Vec32(BF16Vec8& vec8_data)
      : reg_low((__m256i)_mm256_inserti32x4(
@ -212,7 +213,8 @@ struct FP32Vec8 : public Vec<FP32Vec8> {
    AliasReg ar;
    ar.reg = reg;
    float result = 0;
-    unroll_loop<int, VEC_ELEM_NUM>([&result, &ar](int i) { result += ar.values[i]; });
+    unroll_loop<int, VEC_ELEM_NUM>(
+        [&result, &ar](int i) { result += ar.values[i]; });

    return result;
  }
@ -273,11 +275,10 @@ struct INT32Vec16: public Vec<INT32Vec16> {

  __m512i reg;

-  explicit INT32Vec16(const void* data_ptr) : reg(_mm512_loadu_epi32(data_ptr)) {}
+  explicit INT32Vec16(const void* data_ptr)
+      : reg(_mm512_loadu_epi32(data_ptr)) {}

-  void save(int32_t* ptr) const {
-    _mm512_storeu_epi32(ptr, reg);
-  }
+  void save(int32_t* ptr) const { _mm512_storeu_epi32(ptr, reg); }

  void save(int32_t* ptr, const int elem_num) const {
    constexpr uint32_t M = 0xFFFFFFFF;
@ -328,7 +329,8 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}

  explicit FP32Vec16(const INT32Vec16& v)
-      : reg(_mm512_cvt_roundepi32_ps(v.reg, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)) {}
+      : reg(_mm512_cvt_roundepi32_ps(
+            v.reg, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)) {}

  FP32Vec16 operator*(const FP32Vec16& b) const {
    return FP32Vec16(_mm512_mul_ps(reg, b.reg));
@ -370,9 +372,7 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
    return FP32Vec16(_mm512_mask_min_ps(reg, mask, reg, b.reg));
  }

-  FP32Vec16 abs() const {
-    return FP32Vec16(_mm512_abs_ps(reg));
-  } 
+  FP32Vec16 abs() const { return FP32Vec16(_mm512_abs_ps(reg)); }

  float reduce_sum() const { return _mm512_reduce_add_ps(reg); }

@ -380,7 +380,8 @@ struct FP32Vec16 : public Vec<FP32Vec16> {

  float reduce_min() const { return _mm512_reduce_min_ps(reg); }

-  template <int group_size> float reduce_sub_sum(int idx) {
+  template <int group_size>
+  float reduce_sub_sum(int idx) {
    static_assert(VEC_ELEM_NUM % group_size == 0);
    constexpr uint32_t base_mask = (0xFFFF >> (16 - group_size));
    __mmask16 mask = _cvtu32_mask16(base_mask << (idx * group_size));
@ -407,27 +408,25 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
  __m256 reg_low;
  __m256 reg_high;

-  explicit FP32Vec16(float v) : reg_low(_mm256_set1_ps(v)),
-                                reg_high(_mm256_set1_ps(v)) {}
+  explicit FP32Vec16(float v)
+      : reg_low(_mm256_set1_ps(v)), reg_high(_mm256_set1_ps(v)) {}

-  explicit FP32Vec16() : reg_low(_mm256_set1_ps(0.0)),
-                         reg_high(_mm256_set1_ps(0.0)) {}
+  explicit FP32Vec16()
+      : reg_low(_mm256_set1_ps(0.0)), reg_high(_mm256_set1_ps(0.0)) {}

-  explicit FP32Vec16(const float *ptr) : reg_low(_mm256_loadu_ps(ptr)),
-                                         reg_high(_mm256_loadu_ps(ptr + 8)) {}
+  explicit FP32Vec16(const float* ptr)
+      : reg_low(_mm256_loadu_ps(ptr)), reg_high(_mm256_loadu_ps(ptr + 8)) {}

  explicit FP32Vec16(__m256 low, __m256 high) : reg_low(low), reg_high(high) {}

-  explicit FP32Vec16(const FP32Vec16 &data) : reg_low(data.reg_low),
-                                              reg_high(data.reg_high) {}
+  explicit FP32Vec16(const FP32Vec16& data)
+      : reg_low(data.reg_low), reg_high(data.reg_high) {}

  explicit FP32Vec16(const FP32Vec4& data)
      : reg_low((__m256)_mm256_inserti128_si256(
-                _mm256_castsi128_si256((__m128i)data.reg),
-                                       (__m128i)data.reg, 1)),
+            _mm256_castsi128_si256((__m128i)data.reg), (__m128i)data.reg, 1)),
        reg_high((__m256)_mm256_inserti128_si256(
-                 _mm256_castsi128_si256((__m128i)data.reg),
-                                       (__m128i)data.reg, 1)) {}
+            _mm256_castsi128_si256((__m128i)data.reg), (__m128i)data.reg, 1)) {}

  explicit FP32Vec16(const FP32Vec8& data)
      : reg_low(data.reg), reg_high(data.reg) {}
@ -484,7 +483,8 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
    return low.reduce_sum() + high.reduce_sum();
  }

-  template <int group_size> float reduce_sub_sum(int idx) {
+  template <int group_size>
+  float reduce_sub_sum(int idx) {
    float sum = 0.0;
    static_assert(VEC_ELEM_NUM % group_size == 0);
    constexpr uint32_t base_mask = (0xFFFF >> (16 - group_size));
@ -524,13 +524,11 @@ struct INT8Vec16: public Vec<INT8Vec16> {

  __m128i reg;

-  explicit INT8Vec16(const FP32Vec16& vec) : reg(
-    _mm512_cvtepi32_epi8(_mm512_cvt_roundps_epi32(vec.reg, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
-  ) {}
+  explicit INT8Vec16(const FP32Vec16& vec)
+      : reg(_mm512_cvtepi32_epi8(_mm512_cvt_roundps_epi32(
+            vec.reg, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))) {}

-  void save(int8_t* ptr) const {
-    _mm_storeu_epi8(ptr, reg);
-  }
+  void save(int8_t* ptr) const { _mm_storeu_epi8(ptr, reg); }

  void save(int8_t* ptr, const int elem_num) const {
    constexpr uint32_t M = 0xFFFFFFFF;
@ -540,23 +538,40 @@ struct INT8Vec16: public Vec<INT8Vec16> {
 };
 #endif

-template <typename T> struct VecType { using vec_type = void; };
+template <typename T>
+struct VecType {
+  using vec_type = void;
+};

-template <typename T> using vec_t = typename VecType<T>::vec_type;
+template <typename T>
+using vec_t = typename VecType<T>::vec_type;

-template <> struct VecType<float> { using vec_type = FP32Vec8; };
+template <>
+struct VecType<float> {
+  using vec_type = FP32Vec8;
+};

-template <> struct VecType<c10::Half> { using vec_type = FP16Vec8; };
+template <>
+struct VecType<c10::Half> {
+  using vec_type = FP16Vec8;
+};

-template <> struct VecType<c10::BFloat16> { using vec_type = BF16Vec8; };
+template <>
+struct VecType<c10::BFloat16> {
+  using vec_type = BF16Vec8;
+};

-template <typename T> void storeFP32(float v, T *ptr) { *ptr = v; }
+template <typename T>
+void storeFP32(float v, T* ptr) {
+  *ptr = v;
+}

 inline void fma(FP32Vec16& acc, FP32Vec16& a, FP32Vec16& b) {
  acc = acc + a * b;
 }

-template <> inline void storeFP32<c10::Half>(float v, c10::Half *ptr) {
+template <>
+inline void storeFP32<c10::Half>(float v, c10::Half* ptr) {
  *reinterpret_cast<unsigned short*>(ptr) =
      _cvtss_sh(v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
@ -571,11 +586,14 @@ inline FP16Vec16::FP16Vec16(const FP32Vec16 &v)
                          _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)) {}
 #else
 inline FP16Vec16::FP16Vec16(const FP32Vec16& v)
-    : reg(_mm256_insertf128_si256(_mm256_castsi128_si256(FP16Vec8(FP32Vec8(v.reg_low)).reg), FP16Vec8(FP32Vec8(v.reg_low)).reg, 1)) {}
+    : reg(_mm256_insertf128_si256(
+          _mm256_castsi128_si256(FP16Vec8(FP32Vec8(v.reg_low)).reg),
+          FP16Vec8(FP32Vec8(v.reg_low)).reg, 1)) {}
 #endif

 #ifdef __AVX512BF16__
-template <> inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16 *ptr) {
+template <>
+inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
  *reinterpret_cast<__bfloat16*>(ptr) = _mm_cvtness_sbh(v);
 }

@ -589,7 +607,8 @@ inline void fma(FP32Vec16 &acc, BF16Vec32 &a, BF16Vec32 &b) {
  acc.reg = _mm512_dpbf16_ps(acc.reg, (__m512bh)a.reg, (__m512bh)b.reg);
 }
 #else
-template <> inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16 *ptr) {
+template <>
+inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
  c10::BFloat16 __attribute__((__may_alias__))* v_ptr =
      reinterpret_cast<c10::BFloat16*>(&v);
  *ptr = *(v_ptr + 1);
@ -612,7 +631,7 @@ __m128i FP32Vec8_to_BF16Vec8_avx2(__m256 a) {
  ai = _mm256_permute4x64_epi64(ai, 0b00111001);
  return _mm256_extracti128_si256(ai, 0);
 }
-}
+}  // namespace

 inline BF16Vec8::BF16Vec8(const FP32Vec8& v)
    : reg(FP32Vec8_to_BF16Vec8_avx2(v.reg)) {}
--- a/csrc/cutlass_extensions/common.hpp
+++ b/csrc/cutlass_extensions/common.hpp
@ -27,8 +27,7 @@
 inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) {
  int max_shared_mem_per_block_opt_in = 0;
  cudaDeviceGetAttribute(&max_shared_mem_per_block_opt_in,
-                        cudaDevAttrMaxSharedMemoryPerBlockOptin,
-                        device);
+                         cudaDevAttrMaxSharedMemoryPerBlockOptin, device);
  return max_shared_mem_per_block_opt_in;
 }

--- a/docs/source/contributing/overview.md
+++ b/docs/source/contributing/overview.md
@ -25,10 +25,12 @@ Check out the [building from source](#build-from-source) documentation for detai
 ```bash
 pip install -r requirements-dev.txt

-# linting and formatting
-bash format.sh
-# Static type checking
-mypy
+# Linting, formatting and static type checking
+pre-commit install
+
+# You can manually run pre-commit with
+pre-commit run --all-files
+
 # Unit tests
 pytest tests/
 ```
@ -88,7 +90,8 @@ If the PR spans more than one category, please include all relevant prefixes.
 The PR needs to meet the following code quality standards:

 - We adhere to [Google Python style guide](https://google.github.io/styleguide/pyguide.html) and [Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
- Pass all linter checks. Please use <gh-file:format.sh> to format your code.
+- Pass all linter checks. Please use `pre-commit` to format your code. See
+  <https://pre-commit.com/#usage> if `pre-commit` is new to you.
 - The code needs to be well-documented to ensure future contributors can easily
  understand the code.
 - Include sufficient tests to ensure the project stays correct and robust. This
--- a/format.sh
+++ b/format.sh
@ -1,321 +0,0 @@
-#!/usr/bin/env bash
-# YAPF formatter, adapted from ray and skypilot.
-#
-# Usage:
-#    # Do work and commit your work.
-
-#    # Format files that differ from origin/main.
-#    bash format.sh
-
-#    # Commit changed files with message 'Run yapf and ruff'
-#
-#
-# YAPF + Clang formatter (if installed). This script formats all changed files from the last mergebase.
-# You are encouraged to run this locally before pushing changes for review.
-
-# Cause the script to exit if a single command fails
-set -eo pipefail
-
-# this stops git rev-parse from failing if we run this from the .git directory
-builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
-ROOT="$(git rev-parse --show-toplevel)"
-builtin cd "$ROOT" || exit 1
-
-check_command() {
-    if ! command -v "$1" &> /dev/null; then
-        echo "❓❓$1 is not installed, please run \`pip install -r requirements-lint.txt\`"
-        exit 1
-    fi
-}
-
-check_command yapf
-check_command ruff
-check_command mypy
-check_command codespell
-check_command isort
-check_command clang-format
-
-YAPF_VERSION=$(yapf --version | awk '{print $2}')
-RUFF_VERSION=$(ruff --version | awk '{print $2}')
-MYPY_VERSION=$(mypy --version | awk '{print $2}')
-CODESPELL_VERSION=$(codespell --version)
-ISORT_VERSION=$(isort --vn)
-CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
-PYMARKDOWNLNT_VERSION=$(pymarkdownlnt version | awk '{print $1}')
-
-# # params: tool name, tool version, required version
-tool_version_check() {
-    expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3)
-    if [[ "$2" != "$expected" ]]; then
-        echo "❓❓Wrong $1 version installed: $expected is required, not $2."
-        exit 1
-    fi
-}
-
-tool_version_check "yapf" "$YAPF_VERSION"
-tool_version_check "ruff" "$RUFF_VERSION"
-tool_version_check "mypy" "$MYPY_VERSION"
-tool_version_check "isort" "$ISORT_VERSION"
-tool_version_check "codespell" "$CODESPELL_VERSION"
-tool_version_check "clang-format" "$CLANGFORMAT_VERSION"
-tool_version_check "pymarkdownlnt" "$PYMARKDOWNLNT_VERSION"
-
-YAPF_FLAGS=(
-    '--recursive'
-    '--parallel'
-)
-
-YAPF_EXCLUDES=(
-    '--exclude' 'build/**'
-)
-
-# Format specified files
-format() {
-    yapf --in-place "${YAPF_FLAGS[@]}" "$@"
-}
-
-# Format files that differ from main branch. Ignores dirs that are not slated
-# for autoformat yet.
-format_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause yapf to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs -P 5 \
-             yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
-    fi
-
-}
-
-# Format all files
-format_all() {
-    yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
-}
-
-## This flag formats individual files. --files *must* be the first command line
-## arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   format "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is formatted.
-elif [[ "$1" == '--all' ]]; then
-   format_all
-else
-   # Format only the files that changed in last commit.
-   format_changed
-fi
-echo 'vLLM yapf: Done'
-
-# Run mypy
-echo 'vLLM mypy:'
-tools/mypy.sh
-echo 'vLLM mypy: Done'
-
-
-# If git diff returns a file that is in the skip list, the file may be checked anyway:
-# https://github.com/codespell-project/codespell/issues/1915
-# Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem
-CODESPELL_EXCLUDES=(
-    '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**'
-)
-
-# check spelling of specified files
-spell_check() {
-    codespell "$@"
-}
-
-spell_check_all(){
-  codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}"
-}
-
-# Spelling check of files that differ from main branch.
-spell_check_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-            codespell "${CODESPELL_EXCLUDES[@]}"
-    fi
-}
-
-# Run Codespell
-## This flag runs spell check of individual files. --files *must* be the first command line
-## arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   spell_check "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   spell_check_all
-else
-   # Check spelling only of the files that changed in last commit.
-   spell_check_changed
-fi
-echo 'vLLM codespell: Done'
-
-
-# Lint specified files
-lint() {
-    ruff check "$@"
-}
-
-# Lint files that differ from main branch. Ignores dirs that are not slated
-# for autolint yet.
-lint_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             ruff check
-    fi
-
-}
-
-# Run Ruff
-### This flag lints individual files. --files *must* be the first command line
-### arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   lint "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   lint vllm tests
-else
-   # Format only the files that changed in last commit.
-   lint_changed
-fi
-echo 'vLLM ruff: Done'
-
-# check spelling of specified files
-isort_check() {
-    isort "$@"
-}
-
-isort_check_all(){
-  isort .
-}
-
-# Spelling  check of files that differ from main branch.
-isort_check_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             isort
-    fi
-}
-
-# Run Isort
-# This flag runs spell check of individual files. --files *must* be the first command line
-# arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   isort_check "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   isort_check_all
-else
-   # Check spelling only of the files that changed in last commit.
-   isort_check_changed
-fi
-echo 'vLLM isort: Done'
-
-# Clang-format section
-# Exclude some files for formatting because they are vendored
-# NOTE: Keep up to date with .github/workflows/clang-format.yml
-CLANG_FORMAT_EXCLUDES=(
-    'csrc/moe/topk_softmax_kernels.cu'
-    'csrc/quantization/gguf/ggml-common.h'
-    'csrc/quantization/gguf/dequantize.cuh'
-    'csrc/quantization/gguf/vecdotq.cuh'
-    'csrc/quantization/gguf/mmq.cuh'
-    'csrc/quantization/gguf/mmvq.cuh'
-)
-
-# Format specified files with clang-format
-clang_format() {
-    clang-format -i "$@"
-}
-
-# Format files that differ from main branch with clang-format.
-clang_format_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause clang-format to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    # Get the list of changed files, excluding the specified ones
-    changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.h' '*.cpp' '*.cu' '*.cuh' | (grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") || echo -e))
-    if [ -n "$changed_files" ]; then
-        echo "$changed_files" | xargs -P 5 clang-format -i
-    fi
-}
-
-# Format all files with clang-format
-clang_format_all() {
-    find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
-        | grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") \
-        | xargs clang-format -i
-}
-
-# Run clang-format
-if [[ "$1" == '--files' ]]; then
-   clang_format "${@:2}"
-elif [[ "$1" == '--all' ]]; then
-   clang_format_all
-else
-   clang_format_changed
-fi
-echo 'vLLM clang-format: Done'
-
-echo 'vLLM actionlint:'
-tools/actionlint.sh -color
-echo 'vLLM actionlint: Done'
-
-echo 'vLLM shellcheck:'
-tools/shellcheck.sh
-echo 'vLLM shellcheck: Done'
-
-echo 'excalidraw png check:'
-tools/png-lint.sh
-echo 'excalidraw png check: Done'
-
-if ! git diff --quiet &>/dev/null; then
-    echo 
-    echo "🔍🔍There are files changed by the format checker or by you that are not added and committed:"
-    git --no-pager diff --name-only
-    echo "🔍🔍Format checker passed, but please add, commit and push all the files above to include changes made by the format checker."
-
-    exit 1
-else
-    echo "✨🎉 Format check passed! Congratulations! 🎉✨"
-fi
-
-echo 'vLLM doc-lint:'
-tools/doc-lint.sh
-echo 'vLLM doc-lint: Done'
--- a/pyproject.toml
+++ b/pyproject.toml
@ -15,6 +15,11 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools_scm]
 # version_file = "vllm/_version.py" # currently handled by `setup.py:get_version()`

+[tool.yapfignore]
+ignore_patterns = [
+    "build/**",
+]
+
 [tool.ruff]
 # Allow lines to be as long as 80.
 line-length = 80
@ -52,6 +57,9 @@ ignore = [
    "B007",
    # f-string format
    "UP032",
+    # Python 3.8 typing
+    "UP006", "UP035",
+
 ]

 [tool.mypy]
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@ -1,15 +1,2 @@
 # formatting
-yapf==0.32.0
-toml==0.10.2
-tomli==2.0.2
-ruff==0.6.5
-codespell==2.3.0
-isort==5.13.2
-clang-format==18.1.5
-pymarkdownlnt==0.9.26
-
-# type checking
-mypy==1.11.1
-types-PyYAML
-types-requests
-types-setuptools
+pre-commit==4.0.1
--- a/tools/actionlint.sh
+++ b/tools/actionlint.sh
@ -1,13 +0,0 @@
-#!/bin/bash
-
-if command -v actionlint &> /dev/null; then
-    actionlint "$@"
-    exit 0
-elif [ -x ./actionlint ]; then
-    ./actionlint "$@"
-    exit 0
-fi
-
-# download a binary to the current directory - v1.7.3
-bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/aa0a7be8e566b096e64a5df8ff290ec24fa58fbc/scripts/download-actionlint.bash)
-./actionlint "$@"
--- a/tools/doc-lint.sh
+++ b/tools/doc-lint.sh
@ -1,3 +0,0 @@
-#!/bin/bash
-
-pymarkdownlnt scan docs -r