[BugFix/Build] Fix sparse kernels not getting built on hopper (#14572)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
07964e2f30
commit
07b4b7a37f
@ -422,7 +422,8 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
|||||||
# 2:4 Sparse Kernels
|
# 2:4 Sparse Kernels
|
||||||
|
|
||||||
# The 2:4 sparse kernels cutlass_scaled_sparse_mm and cutlass_compressor
|
# The 2:4 sparse kernels cutlass_scaled_sparse_mm and cutlass_compressor
|
||||||
# require CUDA 12.2 or later (and only work on Hopper and Blackwell).
|
# require CUDA 12.2 or later (and only work on Hopper).
|
||||||
|
cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}")
|
||||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.2 AND SCALED_MM_ARCHS)
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.2 AND SCALED_MM_ARCHS)
|
||||||
set(SRCS "csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu")
|
set(SRCS "csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu")
|
||||||
set_gencode_flags_for_srcs(
|
set_gencode_flags_for_srcs(
|
||||||
|
@ -58,7 +58,9 @@ void cutlass_scaled_sparse_mm(torch::Tensor& c, torch::Tensor const& a,
|
|||||||
|
|
||||||
// Guard against compilation issues for sm90 kernels
|
// Guard against compilation issues for sm90 kernels
|
||||||
#if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
|
#if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
|
||||||
if (version_num >= 90) {
|
// We build for 9.0a which is not forward compatible, so restrict this to
|
||||||
|
// Hopper only
|
||||||
|
if (version_num == 90) {
|
||||||
cutlass_scaled_sparse_mm_sm90(c, a, bt_nzs, bt_meta, a_scales, b_scales,
|
cutlass_scaled_sparse_mm_sm90(c, a, bt_nzs, bt_meta, a_scales, b_scales,
|
||||||
bias);
|
bias);
|
||||||
return;
|
return;
|
||||||
@ -82,7 +84,9 @@ std::vector<torch::Tensor> cutlass_sparse_compress(torch::Tensor const& a) {
|
|||||||
|
|
||||||
// Guard against compilation issues for sm90 kernels
|
// Guard against compilation issues for sm90 kernels
|
||||||
#if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
|
#if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
|
||||||
if (version_num >= 90) {
|
// We build for 9.0a which is not forward compatible, so restrict this to
|
||||||
|
// Hopper only
|
||||||
|
if (version_num == 90) {
|
||||||
std::vector<torch::Tensor> result_tensors;
|
std::vector<torch::Tensor> result_tensors;
|
||||||
|
|
||||||
auto [a_meta, a_nzs] = cutlass_sparse_compress_sm90(a);
|
auto [a_meta, a_nzs] = cutlass_sparse_compress_sm90(a);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user