[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
a2dd48c386
commit
38acae6e97
@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
|
|||||||
--ignore=kernels/test_moe.py \
|
--ignore=kernels/test_moe.py \
|
||||||
--ignore=kernels/test_prefix_prefill.py \
|
--ignore=kernels/test_prefix_prefill.py \
|
||||||
--ignore=kernels/test_rand.py \
|
--ignore=kernels/test_rand.py \
|
||||||
--ignore=kernels/test_sampler.py"
|
--ignore=kernels/test_sampler.py \
|
||||||
|
--ignore=kernels/test_cascade_flash_attn.py \
|
||||||
|
--ignore=kernels/test_mamba_mixer2.py"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#ignore certain Entrypoints tests
|
#ignore certain Entrypoints tests
|
||||||
|
@ -7,6 +7,7 @@ import pytest
|
|||||||
|
|
||||||
from tests.kernels.utils import override_backend_env_variable
|
from tests.kernels.utils import override_backend_env_variable
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from .conftest import get_text_from_llm_generator
|
from .conftest import get_text_from_llm_generator
|
||||||
|
|
||||||
@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
|
|||||||
|
|
||||||
Additionally, we compare the results of the v1 and v2 managers.
|
Additionally, we compare the results of the v1 and v2 managers.
|
||||||
"""
|
"""
|
||||||
|
if backend == "FLASHINFER" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Flashinfer does not support ROCm/HIP.")
|
||||||
|
if backend == "XFORMERS" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Xformers does not support ROCm/HIP.")
|
||||||
|
|
||||||
override_backend_env_variable(monkeypatch, backend)
|
override_backend_env_variable(monkeypatch, backend)
|
||||||
|
|
||||||
sampling_params = SamplingParams(
|
sampling_params = SamplingParams(
|
||||||
@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
|
|||||||
The results with and without chunked prefill are not the same due to
|
The results with and without chunked prefill are not the same due to
|
||||||
numerical instabilities.
|
numerical instabilities.
|
||||||
"""
|
"""
|
||||||
|
if backend == "FLASHINFER" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Flashinfer does not support ROCm/HIP.")
|
||||||
|
if backend == "XFORMERS" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Xformers does not support ROCm/HIP.")
|
||||||
override_backend_env_variable(monkeypatch, backend)
|
override_backend_env_variable(monkeypatch, backend)
|
||||||
|
|
||||||
sampling_params = SamplingParams(
|
sampling_params = SamplingParams(
|
||||||
|
@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
|
|||||||
from vllm import SamplingParams, TokensPrompt
|
from vllm import SamplingParams, TokensPrompt
|
||||||
from vllm.core.scheduler import Scheduler
|
from vllm.core.scheduler import Scheduler
|
||||||
from vllm.engine.llm_engine import LLMEngine
|
from vllm.engine.llm_engine import LLMEngine
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ..models.utils import check_outputs_equal
|
from ..models.utils import check_outputs_equal
|
||||||
|
|
||||||
@ -53,6 +54,10 @@ def test_mixed_requests(
|
|||||||
and the others don't. The cached position determines where
|
and the others don't. The cached position determines where
|
||||||
the sequence is at among the batch of prefills.
|
the sequence is at among the batch of prefills.
|
||||||
"""
|
"""
|
||||||
|
if backend == "FLASHINFER" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Flashinfer does not support ROCm/HIP.")
|
||||||
|
if backend == "XFORMERS" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Xformers does not support ROCm/HIP.")
|
||||||
override_backend_env_variable(monkeypatch, backend)
|
override_backend_env_variable(monkeypatch, backend)
|
||||||
|
|
||||||
with hf_runner(model, dtype=dtype) as hf_model:
|
with hf_runner(model, dtype=dtype) as hf_model:
|
||||||
@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
|
|||||||
backend: str,
|
backend: str,
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
|
if backend == "FLASHINFER" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Flashinfer does not support ROCm/HIP.")
|
||||||
|
if backend == "XFORMERS" and current_platform.is_rocm():
|
||||||
|
pytest.skip("Xformers does not support ROCm/HIP.")
|
||||||
override_backend_env_variable(monkeypatch, backend)
|
override_backend_env_variable(monkeypatch, backend)
|
||||||
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user