[CI/Build] respect the common environment variable MAX_JOBS (#3600)

2024-03-24 17:04:00 -07:00 · 2024-03-24 17:04:00 -07:00 · 42bc386129
commit 42bc386129
parent 8b268a46a7
2 changed files with 22 additions and 6 deletions
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@ -60,6 +60,15 @@ You can also build and install vLLM from source:
    $ cd vllm
    $ pip install -e .  # This may take 5-10 minutes.
 .. tip::
    To avoid your system being overloaded, you can limit the number of compilation jobs
    to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
    .. code-block:: console
        $ export MAX_JOBS=6
        $ pip install -e .
 .. tip::
    If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.
--- a/setup.py
+++ b/setup.py
@ -1,6 +1,7 @@
 import io
 import os
 import re
 import logging
 import subprocess
 import sys
 from typing import List
@ -13,6 +14,7 @@ import torch
 from torch.utils.cpp_extension import CUDA_HOME
 ROOT_DIR = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 # vLLM only supports Linux platform
 assert sys.platform.startswith(
@ -54,12 +56,17 @@ class cmake_build_ext(build_ext):
    # Determine number of compilation jobs and optionally nvcc compile threads.
    #
    def compute_num_jobs(self):
-        try:
+        num_jobs = os.environ.get("MAX_JOBS", None)
-            # os.sched_getaffinity() isn't universally available, so fall back
+        if num_jobs is not None:
-            # to os.cpu_count() if we get an error here.
+            num_jobs = int(num_jobs)
-            num_jobs = len(os.sched_getaffinity(0))
+            logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.")
-        except AttributeError:
+        else:
-            num_jobs = os.cpu_count()
+            try:
                # os.sched_getaffinity() isn't universally available, so fall
                #  back to os.cpu_count() if we get an error here.
                num_jobs = len(os.sched_getaffinity(0))
            except AttributeError:
                num_jobs = os.cpu_count()
        nvcc_threads = None
        if _is_cuda():