[Bugfix] Fix spurious "No compiled cutlass_scaled_mm ..." for W8A8 on Turing (#9487)
This commit is contained in:
parent
cb6fdaa0a0
commit
d1e8240875
@ -252,7 +252,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
|
||||
else()
|
||||
message(STATUS "Not building Marlin kernels as no compatible archs found"
|
||||
"in CUDA target architectures")
|
||||
" in CUDA target architectures")
|
||||
endif()
|
||||
|
||||
#
|
||||
@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
|
||||
else()
|
||||
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
|
||||
"in CUDA target architectures")
|
||||
" in CUDA target architectures")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -137,9 +137,11 @@ void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
|
||||
return;
|
||||
}
|
||||
|
||||
// Turing
|
||||
TORCH_CHECK(version_num >= 75);
|
||||
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
|
||||
if (version_num >= 75) {
|
||||
// Turing
|
||||
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
|
Loading…
x
Reference in New Issue
Block a user