39 lines
1.6 KiB
C++
39 lines
1.6 KiB
C++
![]() |
#pragma once
|
||
|
|
||
|
#include "cutlass/gemm/dispatch_policy.hpp"
|
||
|
|
||
|
namespace cutlass::gemm {
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
// FP8 related policies (including Blocked Scaled Accumulation)
|
||
|
// `ScaleGranularityM` specifies scaling granularity along M, while zero-value
|
||
|
// `ScaleGranularityM` indicates that scaling granularity is
|
||
|
// `size<0>(TileShape_MNK{})` along M.
|
||
|
template <int ScaleGranularityM = 0>
|
||
|
struct KernelTmaWarpSpecializedCooperativeFP8BlockScaledSubGroupMAccum
|
||
|
: KernelTmaWarpSpecializedCooperative {};
|
||
|
|
||
|
// n-buffer in smem (Hopper TMA), pipelined with Hopper GMMA and TMA, Warp
|
||
|
// specialized dynamic schedule For FP8 kernels with Block Scaling
|
||
|
template <int Stages_, class ClusterShape_ = Shape<_1, _1, _1>,
|
||
|
class KernelSchedule = KernelTmaWarpSpecialized,
|
||
|
int ScaleGranularityM =
|
||
|
0 // `ScaleGranularityM` specifies scaling granularity along M,
|
||
|
// while zero-value `ScaleGranularityM` indicates that scaling
|
||
|
// granularity is `size<0>(TileShape_MNK{})` along M.
|
||
|
>
|
||
|
struct MainloopSm90TmaGmmaWarpSpecializedBlockScalingSubGroupMFP8
|
||
|
: MainloopSm90TmaGmmaWarpSpecialized<Stages_, ClusterShape_,
|
||
|
KernelSchedule> {
|
||
|
static_assert(
|
||
|
cute::is_same_v<
|
||
|
KernelSchedule,
|
||
|
KernelTmaWarpSpecializedCooperativeFP8BlockScaledSubGroupMAccum<
|
||
|
ScaleGranularityM>>,
|
||
|
"KernelSchedule must be one of the warp specialized policies");
|
||
|
};
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
} // namespace cutlass::gemm
|