[CI/Build] respect the common environment variable MAX_JOBS (#3600)

This commit is contained in:
youkaichao 2024-03-24 17:04:00 -07:00 committed by GitHub
parent 8b268a46a7
commit 42bc386129
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 6 deletions

View File

@ -60,6 +60,15 @@ You can also build and install vLLM from source:
$ cd vllm
$ pip install -e . # This may take 5-10 minutes.
.. tip::
To avoid your system being overloaded, you can limit the number of compilation jobs
to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
.. code-block:: console
$ export MAX_JOBS=6
$ pip install -e .
.. tip::
If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.

View File

@ -1,6 +1,7 @@
import io
import os
import re
import logging
import subprocess
import sys
from typing import List
@ -13,6 +14,7 @@ import torch
from torch.utils.cpp_extension import CUDA_HOME
ROOT_DIR = os.path.dirname(__file__)
logger = logging.getLogger(__name__)
# vLLM only supports Linux platform
assert sys.platform.startswith(
@ -54,12 +56,17 @@ class cmake_build_ext(build_ext):
# Determine number of compilation jobs and optionally nvcc compile threads.
#
def compute_num_jobs(self):
try:
# os.sched_getaffinity() isn't universally available, so fall back
# to os.cpu_count() if we get an error here.
num_jobs = len(os.sched_getaffinity(0))
except AttributeError:
num_jobs = os.cpu_count()
num_jobs = os.environ.get("MAX_JOBS", None)
if num_jobs is not None:
num_jobs = int(num_jobs)
logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.")
else:
try:
# os.sched_getaffinity() isn't universally available, so fall
# back to os.cpu_count() if we get an error here.
num_jobs = len(os.sched_getaffinity(0))
except AttributeError:
num_jobs = os.cpu_count()
nvcc_threads = None
if _is_cuda():