[CI/Build] respect the common environment variable MAX_JOBS (#3600)

2024-03-24 17:04:00 -07:00 · 2024-03-24 17:04:00 -07:00 · 42bc386129
commit 42bc386129
parent 8b268a46a7
2 changed files with 22 additions and 6 deletions
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@ -60,6 +60,15 @@ You can also build and install vLLM from source:
    $ cd vllm
    $ pip install -e .  # This may take 5-10 minutes.

+.. tip::
+    To avoid your system being overloaded, you can limit the number of compilation jobs
+    to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
+
+    .. code-block:: console
+
+        $ export MAX_JOBS=6
+        $ pip install -e .
+
 .. tip::
    If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.

--- a/setup.py
+++ b/setup.py
@ -1,6 +1,7 @@
 import io
 import os
 import re
+import logging
 import subprocess
 import sys
 from typing import List
@ -13,6 +14,7 @@ import torch
 from torch.utils.cpp_extension import CUDA_HOME

 ROOT_DIR = os.path.dirname(__file__)
+logger = logging.getLogger(__name__)

 # vLLM only supports Linux platform
 assert sys.platform.startswith(
@ -54,12 +56,17 @@ class cmake_build_ext(build_ext):
    # Determine number of compilation jobs and optionally nvcc compile threads.
    #
    def compute_num_jobs(self):
-        try:
-            # os.sched_getaffinity() isn't universally available, so fall back
-            # to os.cpu_count() if we get an error here.
-            num_jobs = len(os.sched_getaffinity(0))
-        except AttributeError:
-            num_jobs = os.cpu_count()
+        num_jobs = os.environ.get("MAX_JOBS", None)
+        if num_jobs is not None:
+            num_jobs = int(num_jobs)
+            logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.")
+        else:
+            try:
+                # os.sched_getaffinity() isn't universally available, so fall
+                #  back to os.cpu_count() if we get an error here.
+                num_jobs = len(os.sched_getaffinity(0))
+            except AttributeError:
+                num_jobs = os.cpu_count()

        nvcc_threads = None
        if _is_cuda():