[ROCm] enable cupy in order to enable cudagraph mode for AMD GPUs (#3123)
Co-authored-by: lcskrishna <lollachaitanya@gmail.com>
This commit is contained in:
parent
9a4548bae7
commit
05af6da8d9
@ -23,6 +23,9 @@ RUN echo "FA_BRANCH is $FA_BRANCH"
|
||||
# In that case, we need to use the python reference attention implementation in vllm
|
||||
ARG BUILD_FA="1"
|
||||
|
||||
# whether to build cupy on rocm
|
||||
ARG BUILD_CUPY="1"
|
||||
|
||||
# Install some basic utilities
|
||||
RUN apt-get update && apt-get install python3 python3-pip -y
|
||||
|
||||
@ -70,16 +73,33 @@ RUN if [ "$BUILD_FA" = "1" ]; then \
|
||||
&& cd ..; \
|
||||
fi
|
||||
|
||||
COPY ./ /app/vllm
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN python3 -m pip install xformers==0.0.23 --no-deps
|
||||
|
||||
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
|
||||
# Manually removed it so that later steps of numpy upgrade can continue
|
||||
RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" ]; then \
|
||||
rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/; fi
|
||||
|
||||
# build cupy
|
||||
RUN if [ "$BUILD_CUPY" = "1" ]; then \
|
||||
mkdir -p libs \
|
||||
&& cd libs \
|
||||
&& git clone -b hipgraph_enablement --recursive https://github.com/ROCm/cupy.git \
|
||||
&& cd cupy \
|
||||
&& pip install mpi4py-mpich \
|
||||
&& pip install scipy==1.9.3 \
|
||||
&& pip install cython==0.29.* \
|
||||
&& env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py \
|
||||
&& export CUPY_INSTALL_USE_HIP=1 \
|
||||
&& export ROCM_HOME=/opt/rocm \
|
||||
&& export HCC_AMDGPU_TARGET="gfx90a,gfx942,gfx1100" \
|
||||
&& pip install . \
|
||||
&& cd ..; \
|
||||
fi
|
||||
|
||||
COPY ./ /app/vllm
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN python3 -m pip install xformers==0.0.23 --no-deps
|
||||
|
||||
RUN cd /app \
|
||||
&& cd vllm \
|
||||
&& pip install -U -r requirements-rocm.txt \
|
||||
|
@ -19,7 +19,6 @@ from vllm.sequence import SamplerOutput, SequenceGroupMetadata
|
||||
from vllm.worker.cache_engine import CacheEngine
|
||||
from vllm.worker.model_runner import ModelRunner
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.utils import is_hip
|
||||
|
||||
|
||||
class Worker:
|
||||
@ -267,8 +266,7 @@ def init_distributed_environment(
|
||||
"cupy.distributed is already initialized but the cupy world "
|
||||
"size does not match parallel_config.world_size "
|
||||
f"({cupy_world_size} vs. {parallel_config.world_size}).")
|
||||
elif (parallel_config.world_size > 1 and cupy_port is not None
|
||||
and not is_hip()):
|
||||
elif (parallel_config.world_size > 1 and cupy_port is not None):
|
||||
# NOTE(woosuk): We don't initialize CuPy process group when world size
|
||||
# is 1.
|
||||
# TODO(woosuk): Support multi-node connection.
|
||||
|
Loading…
x
Reference in New Issue
Block a user