From 943e72ca56974b4d8b5a141182e717d2abd3a819 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Mon, 20 May 2024 13:29:28 -0500 Subject: [PATCH] [Build/CI] Enabling AMD Entrypoints Test (#4834) Co-authored-by: Alexey Kondratiev --- .buildkite/test-pipeline.yaml | 3 ++- Dockerfile.rocm | 8 ++++++-- requirements-rocm.txt | 3 ++- tests/spec_decode/e2e/conftest.py | 8 ++++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 6f5c46e2..def8a460 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -60,7 +60,8 @@ steps: command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py - label: Entrypoints Test - #mirror_hardwares: [amd] + mirror_hardwares: [amd] + commands: # these tests have to be separated, because each one will allocate all posible GPU memory - pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py diff --git a/Dockerfile.rocm b/Dockerfile.rocm index eefad79e..9bfe8446 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -92,19 +92,23 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \ WORKDIR /vllm-workspace COPY . . +#RUN python3 -m pip install pynvml # to be removed eventually RUN python3 -m pip install --upgrade pip numba # make sure punica kernels are built (for LoRA) ENV VLLM_INSTALL_PUNICA_KERNELS=1 +# Workaround for ray >= 2.10.0 +ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 + +ENV VLLM_NCCL_SO_PATH=/opt/rocm/lib/librccl.so RUN --mount=type=cache,target=/root/.cache/pip \ pip install -U -r requirements-rocm.txt \ && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \ && python3 setup.py install \ && cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \ + && cp build/lib.linux-x86_64-cpython-39/vllm/_punica_C.cpython-39-x86_64-linux-gnu.so vllm/ \ && cd .. -RUN python3 -m pip install --upgrade pip -RUN python3 -m pip install --no-cache-dir ray[all]==2.9.3 CMD ["/bin/bash"] diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 903845b6..cc42839a 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -2,4 +2,5 @@ -r requirements-common.txt # Dependencies for AMD GPUs -ray == 2.9.3 +ray >= 2.10.0 +pytest-asyncio diff --git a/tests/spec_decode/e2e/conftest.py b/tests/spec_decode/e2e/conftest.py index da8b9271..7c5840ba 100644 --- a/tests/spec_decode/e2e/conftest.py +++ b/tests/spec_decode/e2e/conftest.py @@ -6,8 +6,12 @@ from typing import Dict, List, Optional, Tuple, Union import pytest import ray import torch -from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, - nvmlInit) + +from vllm.utils import is_hip + +if (not is_hip()): + from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, + nvmlInit) from vllm import LLM from vllm.engine.arg_utils import AsyncEngineArgs