[ROCm][Hardware][AMD] Adding Navi21 to fallback to naive attention if Triton is not used (#4658)

2024-05-18 01:09:11 -04:00 · 2024-05-18 01:09:11 -04:00 · c0724fc915
commit c0724fc915
parent 86b45ae065
1 changed files with 3 additions and 2 deletions
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@ -231,8 +231,9 @@ class ROCmFlashAttentionImpl(AttentionImpl):
            self.attn_func = triton_attention
            logger.debug("Using Triton FA in ROCmBackend")
        else:
-            # if not using triton, navi3x not use flash-attn either
+            # if not using triton, navi3x/navi21/navi10 do not use flash-attn
-            if torch.cuda.get_device_capability()[0] == 11:
+            # either
            if torch.cuda.get_device_capability()[0] != 9:
                self.use_naive_attn = True
            else:
                try: