From 42bc386129f6890aa1654c31aa17a415f7642a5e Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 24 Mar 2024 17:04:00 -0700 Subject: [PATCH] [CI/Build] respect the common environment variable MAX_JOBS (#3600) --- docs/source/getting_started/installation.rst | 9 +++++++++ setup.py | 19 +++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst index 77b0ae65..3355a894 100644 --- a/docs/source/getting_started/installation.rst +++ b/docs/source/getting_started/installation.rst @@ -60,6 +60,15 @@ You can also build and install vLLM from source: $ cd vllm $ pip install -e . # This may take 5-10 minutes. +.. tip:: + To avoid your system being overloaded, you can limit the number of compilation jobs + to be run simultaneously, via the environment variable `MAX_JOBS`. For example: + + .. code-block:: console + + $ export MAX_JOBS=6 + $ pip install -e . + .. tip:: If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image. diff --git a/setup.py b/setup.py index 47cac599..27106b1f 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import io import os import re +import logging import subprocess import sys from typing import List @@ -13,6 +14,7 @@ import torch from torch.utils.cpp_extension import CUDA_HOME ROOT_DIR = os.path.dirname(__file__) +logger = logging.getLogger(__name__) # vLLM only supports Linux platform assert sys.platform.startswith( @@ -54,12 +56,17 @@ class cmake_build_ext(build_ext): # Determine number of compilation jobs and optionally nvcc compile threads. # def compute_num_jobs(self): - try: - # os.sched_getaffinity() isn't universally available, so fall back - # to os.cpu_count() if we get an error here. - num_jobs = len(os.sched_getaffinity(0)) - except AttributeError: - num_jobs = os.cpu_count() + num_jobs = os.environ.get("MAX_JOBS", None) + if num_jobs is not None: + num_jobs = int(num_jobs) + logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.") + else: + try: + # os.sched_getaffinity() isn't universally available, so fall + # back to os.cpu_count() if we get an error here. + num_jobs = len(os.sched_getaffinity(0)) + except AttributeError: + num_jobs = os.cpu_count() nvcc_threads = None if _is_cuda():