[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore 2025-02-27 12:31:47 -08:00 committed by GitHub
parent a2dd48c386
commit 38acae6e97
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 1 deletions

View File

@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
--ignore=kernels/test_moe.py \ --ignore=kernels/test_moe.py \
--ignore=kernels/test_prefix_prefill.py \ --ignore=kernels/test_prefix_prefill.py \
--ignore=kernels/test_rand.py \ --ignore=kernels/test_rand.py \
--ignore=kernels/test_sampler.py" --ignore=kernels/test_sampler.py \
--ignore=kernels/test_cascade_flash_attn.py \
--ignore=kernels/test_mamba_mixer2.py"
fi fi
#ignore certain Entrypoints tests #ignore certain Entrypoints tests

View File

@ -7,6 +7,7 @@ import pytest
from tests.kernels.utils import override_backend_env_variable from tests.kernels.utils import override_backend_env_variable
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.platforms import current_platform
from .conftest import get_text_from_llm_generator from .conftest import get_text_from_llm_generator
@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
Additionally, we compare the results of the v1 and v2 managers. Additionally, we compare the results of the v1 and v2 managers.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
sampling_params = SamplingParams( sampling_params = SamplingParams(
@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
The results with and without chunked prefill are not the same due to The results with and without chunked prefill are not the same due to
numerical instabilities. numerical instabilities.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
sampling_params = SamplingParams( sampling_params = SamplingParams(

View File

@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
from vllm import SamplingParams, TokensPrompt from vllm import SamplingParams, TokensPrompt
from vllm.core.scheduler import Scheduler from vllm.core.scheduler import Scheduler
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.platforms import current_platform
from ..models.utils import check_outputs_equal from ..models.utils import check_outputs_equal
@ -53,6 +54,10 @@ def test_mixed_requests(
and the others don't. The cached position determines where and the others don't. The cached position determines where
the sequence is at among the batch of prefills. the sequence is at among the batch of prefills.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
with hf_runner(model, dtype=dtype) as hf_model: with hf_runner(model, dtype=dtype) as hf_model:
@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
backend: str, backend: str,
monkeypatch, monkeypatch,
) -> None: ) -> None:
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
with vllm_runner( with vllm_runner(