[Build] Disable sm_90a in cu11 (#5141)

This commit is contained in:
Simon Mo 2024-05-30 16:37:16 -05:00 committed by GitHub
parent 87a658c812
commit 45a1a69b98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -177,7 +177,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
include(FetchContent) include(FetchContent)
SET(CUTLASS_ENABLE_HEADERS_ONLY=ON) SET(CUTLASS_ENABLE_HEADERS_ONLY=ON)
FetchContent_Declare( FetchContent_Declare(
cutlass cutlass
GIT_REPOSITORY https://github.com/nvidia/cutlass.git GIT_REPOSITORY https://github.com/nvidia/cutlass.git
# CUTLASS 3.5.0 # CUTLASS 3.5.0
GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc
@ -200,11 +200,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
# The CUTLASS kernels for Hopper require sm90a to be enabled. # The CUTLASS kernels for Hopper require sm90a to be enabled.
# This is done via the below gencode option, BUT that creates kernels for both sm90 and sm90a. # This is done via the below gencode option, BUT that creates kernels for both sm90 and sm90a.
# That adds an extra 17MB to compiled binary, so instead we selectively enable it. # That adds an extra 17MB to compiled binary, so instead we selectively enable it.
set_source_files_properties( if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 11)
"csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu" set_source_files_properties(
PROPERTIES "csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
COMPILE_FLAGS PROPERTIES
"-gencode arch=compute_90a,code=sm_90a") COMPILE_FLAGS
"-gencode arch=compute_90a,code=sm_90a")
endif()
endif() endif()