[CI/Build] respect the common environment variable MAX_JOBS (#3600)
This commit is contained in:
parent
8b268a46a7
commit
42bc386129
@ -60,6 +60,15 @@ You can also build and install vLLM from source:
|
|||||||
$ cd vllm
|
$ cd vllm
|
||||||
$ pip install -e . # This may take 5-10 minutes.
|
$ pip install -e . # This may take 5-10 minutes.
|
||||||
|
|
||||||
|
.. tip::
|
||||||
|
To avoid your system being overloaded, you can limit the number of compilation jobs
|
||||||
|
to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ export MAX_JOBS=6
|
||||||
|
$ pip install -e .
|
||||||
|
|
||||||
.. tip::
|
.. tip::
|
||||||
If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.
|
If you have trouble building vLLM, we recommend using the NVIDIA PyTorch Docker image.
|
||||||
|
|
||||||
|
19
setup.py
19
setup.py
@ -1,6 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from typing import List
|
from typing import List
|
||||||
@ -13,6 +14,7 @@ import torch
|
|||||||
from torch.utils.cpp_extension import CUDA_HOME
|
from torch.utils.cpp_extension import CUDA_HOME
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(__file__)
|
ROOT_DIR = os.path.dirname(__file__)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# vLLM only supports Linux platform
|
# vLLM only supports Linux platform
|
||||||
assert sys.platform.startswith(
|
assert sys.platform.startswith(
|
||||||
@ -54,12 +56,17 @@ class cmake_build_ext(build_ext):
|
|||||||
# Determine number of compilation jobs and optionally nvcc compile threads.
|
# Determine number of compilation jobs and optionally nvcc compile threads.
|
||||||
#
|
#
|
||||||
def compute_num_jobs(self):
|
def compute_num_jobs(self):
|
||||||
try:
|
num_jobs = os.environ.get("MAX_JOBS", None)
|
||||||
# os.sched_getaffinity() isn't universally available, so fall back
|
if num_jobs is not None:
|
||||||
# to os.cpu_count() if we get an error here.
|
num_jobs = int(num_jobs)
|
||||||
num_jobs = len(os.sched_getaffinity(0))
|
logger.info(f"Using MAX_JOBS={num_jobs} as the number of jobs.")
|
||||||
except AttributeError:
|
else:
|
||||||
num_jobs = os.cpu_count()
|
try:
|
||||||
|
# os.sched_getaffinity() isn't universally available, so fall
|
||||||
|
# back to os.cpu_count() if we get an error here.
|
||||||
|
num_jobs = len(os.sched_getaffinity(0))
|
||||||
|
except AttributeError:
|
||||||
|
num_jobs = os.cpu_count()
|
||||||
|
|
||||||
nvcc_threads = None
|
nvcc_threads = None
|
||||||
if _is_cuda():
|
if _is_cuda():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user