From 8c32b08a86bd7dd1a3c59cb9257eaa80fd329276 Mon Sep 17 00:00:00 2001 From: Jinzhen Lin Date: Fri, 14 Feb 2025 12:07:05 +0800 Subject: [PATCH] [Kernel] Fix awq error when n is not divisable by 128 (#13227) --- csrc/quantization/awq/gemm_kernels.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/quantization/awq/gemm_kernels.cu b/csrc/quantization/awq/gemm_kernels.cu index 9da724a1..53c47679 100644 --- a/csrc/quantization/awq/gemm_kernels.cu +++ b/csrc/quantization/awq/gemm_kernels.cu @@ -334,7 +334,7 @@ __global__ void __launch_bounds__(64) } // TODO: Shang: Hoist loop invariance. - for (int ax1_0_1 = 0; ax1_0_1 < 4; ++ax1_0_1) { + for (int ax1_0_1 = 0; ax1_0_1 < (N / 32); ++ax1_0_1) { for (int local_id = 0; local_id < 8; ++local_id) { int row_offset = (((int)blockIdx_y) / j_factors1) * 16 + ((int)threadIdx.x) / 4 + (local_id % 4) / 2 * 8;