[Kernel] Update vllm-flash-attn version to reduce CPU overheads (#10742)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
5fc5ce0fe4
commit
8c1e77fb58
@ -522,7 +522,7 @@ else()
|
||||
FetchContent_Declare(
|
||||
vllm-flash-attn
|
||||
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
|
||||
GIT_TAG d886f88165702b3c7e7744502772cd98b06be9e1
|
||||
GIT_TAG fdf6d72b48aea41f4ae6a89139a453dae554abc8
|
||||
GIT_PROGRESS TRUE
|
||||
# Don't share the vllm-flash-attn build between build types
|
||||
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
|
||||
|
Loading…
x
Reference in New Issue
Block a user