[NVIDIA] Fix an issue to use current stream for the nvfp4 quant (#13632)
This commit is contained in:
parent
981f3c831e
commit
27a09dc52c
@ -348,10 +348,7 @@ void scaled_fp4_quant_sm100a(torch::Tensor const& output,
|
||||
auto sf_out = static_cast<int32_t*>(output_sf.data_ptr());
|
||||
auto output_ptr = static_cast<int64_t*>(output.data_ptr());
|
||||
at::cuda::CUDAGuard device_guard{(char)input.get_device()};
|
||||
auto stream = at::cuda::getStreamFromPool(false, input.get_device());
|
||||
if (stream == nullptr) {
|
||||
std::cerr << "Warning: Null CUDA stream" << std::endl;
|
||||
}
|
||||
auto stream = at::cuda::getCurrentCUDAStream(input.get_device());
|
||||
|
||||
// We don't support e8m0 scales at this moment.
|
||||
bool useUE8M0 = false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user