From e5e03c2c1b3708640b913a0bcb9b6815f047e7fa Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 7 Mar 2025 00:56:06 -0500 Subject: [PATCH] [BugFix] Illegal Memory Access in the blockwise cutlass fp8 GEMMs (#14396) --- ...90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/cutlass_extensions/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp b/csrc/cutlass_extensions/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp index 928a9500..d922a334 100644 --- a/csrc/cutlass_extensions/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp +++ b/csrc/cutlass_extensions/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp @@ -402,7 +402,7 @@ struct CollectiveMma< // TODO: test `scale_copy_a` with `ScaleMsPerTile` < 128 TiledCopy scale_copy_a = make_tiled_copy(SmemBlockScalingCopyAtomA{}, - Layout>{}, Layout>{}); // (1,1,1) + Layout>{}, Layout>{}); // (1,1,1) TiledCopy scale_copy_b = make_tiled_copy(SmemBlockScalingCopyAtomB{}, Layout>{}, Layout>{}); // (1,1,1) ThrCopy thr_scale_copy_a = scale_copy_a.get_slice(threadIdx.x);