From 95aca283b456c3aae09a37a7769cac07681fc585 Mon Sep 17 00:00:00 2001 From: Divakar Verma <137818590+divakar-amd@users.noreply.github.com> Date: Wed, 16 Apr 2025 21:52:11 -0500 Subject: [PATCH] [rocm][V0] fix selection logic for custom PA in V0 (#16426) Signed-off-by: Divakar Verma --- vllm/platforms/rocm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index d18b7c26..ca652831 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -109,8 +109,11 @@ def use_rocm_custom_paged_attention(qtype: torch.dtype, head_size: int, ON_MI250_MI300 = any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942"]) # rocm custom page attention not support on navi (gfx1*) + # custom paged attn always supported on V0. On V1, requires sliding window + # disabled due to observed numerical discrepancy. return (ON_MI250_MI300 and not ON_NAVI - and (sliding_window == 0 or sliding_window == (-1, -1)) + and (not envs.VLLM_USE_V1 or sliding_window == 0 + or sliding_window == (-1, -1)) and (qtype == torch.half or qtype == torch.bfloat16) and (head_size == 64 or head_size == 128) and (block_size == 16 or block_size == 32)