[CI/Build] respect the common environment variable MAX_JOBS (#3600)

This commit is contained in:
youkaichao 2024-03-24 17:04:00 -07:00 committed by GitHub
parent 8b268a46a7
commit 42bc386129
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 6 deletions

View File

@ -60,6 +60,15 @@ You can also build and install vLLM from source:
$ cd vllm $ cd vllm
$ pip install -e . # This may take 5-10 minutes. $ pip install -e . # This may take 5-10 minutes.
.. tip::
To avoid your system being overloaded, you can limit the number of compilation jobs
to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
.. code-block:: console
$ export MAX_JOBS=6
$ pip install -e .
.. tip:: .. tip::
If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image. If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.

View File

@ -1,6 +1,7 @@
import io import io
import os import os
import re import re
import logging
import subprocess import subprocess
import sys import sys
from typing import List from typing import List
@ -13,6 +14,7 @@ import torch
from torch.utils.cpp_extension import CUDA_HOME from torch.utils.cpp_extension import CUDA_HOME
ROOT_DIR = os.path.dirname(__file__) ROOT_DIR = os.path.dirname(__file__)
logger = logging.getLogger(__name__)
# vLLM only supports Linux platform # vLLM only supports Linux platform
assert sys.platform.startswith( assert sys.platform.startswith(
@ -54,12 +56,17 @@ class cmake_build_ext(build_ext):
# Determine number of compilation jobs and optionally nvcc compile threads. # Determine number of compilation jobs and optionally nvcc compile threads.
# #
def compute_num_jobs(self): def compute_num_jobs(self):
try: num_jobs = os.environ.get("MAX_JOBS", None)
# os.sched_getaffinity() isn't universally available, so fall back if num_jobs is not None:
# to os.cpu_count() if we get an error here. num_jobs = int(num_jobs)
num_jobs = len(os.sched_getaffinity(0)) logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.")
except AttributeError: else:
num_jobs = os.cpu_count() try:
# os.sched_getaffinity() isn't universally available, so fall
# back to os.cpu_count() if we get an error here.
num_jobs = len(os.sched_getaffinity(0))
except AttributeError:
num_jobs = os.cpu_count()
nvcc_threads = None nvcc_threads = None
if _is_cuda(): if _is_cuda():