[Bugfix] Fix spurious "No compiled cutlass_scaled_mm ..." for W8A8 on Turing (#9487)

This commit is contained in:
Lucas Wilkinson 2024-10-22 18:41:13 -04:00 committed by GitHub
parent cb6fdaa0a0
commit d1e8240875
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 5 deletions

View File

@ -252,7 +252,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
else()
message(STATUS "Not building Marlin kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
#
@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
else()
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
endif()

View File

@ -137,9 +137,11 @@ void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
return;
}
// Turing
TORCH_CHECK(version_num >= 75);
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
if (version_num >= 75) {
// Turing
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
return;
}
#endif
TORCH_CHECK_NOT_IMPLEMENTED(