/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm_dispatcher.hpp Source File#
warp_gemm_dispatcher.hpp
Go to the documentation of this file.
34 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
35 template<> struct Dispatcher<float, float, float, 16, 16, 4, false> { using Type = WarpGemmMfmaF32F32F32M16N16K4; };
36 template<> struct Dispatcher<float, float, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF32F32F32M16N16K16<>; };
37 template<> struct Dispatcher<float, float, float, 16, 16, 16, true> { using Type = WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution<>; };
39 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
40 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaF16F16F32M32N32K8; };
41 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution; };
42 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaF16F16F32M32N32K16<>; };
43 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution<>; };
44 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M32N32K16<EDouble>; };
45 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution<EDouble>; };
46 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaF16F16F32M16N16K32<>; };
47 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, true> { using Type = WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution<>; };
48 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M16N16K32<EDouble>; };
49 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, true, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution<EDouble>; };
50 template<> struct Dispatcher<half_t, half_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaF16F16F32M4N64K16; };
51 template<> struct Dispatcher<half_t, half_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaF16F16F32M64N4K16; };
54 template<bool TransposeC> struct Dispatcher<half_t, half_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f16_f16<TransposeC>;};
56 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF16F16F32M16N16K16; };
57 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 16, true> { using Type = WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution; };
60 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8SwizzleA; };
61 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16SwizzleA; };
62 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, true, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution; };
63 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution; };
66 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
67 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M32N32K16; };
68 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M16N16K32; };
71 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
72 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8; };
73 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution; };
74 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16<>; };
75 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution<>; };
76 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16<EDouble>; };
77 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution<EDouble>; };
78 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32<>; };
79 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, true> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution<>; };
80 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, false, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32<EDouble>; };
81 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, true, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution<EDouble>; };
82 template<> struct Dispatcher<bf16_t, bf16_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M4N64K16; };
83 template<> struct Dispatcher<bf16_t, bf16_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M64N4K16; };
86 template<bool TransposeC> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf16_bf16<TransposeC>; };
88 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K16; };
89 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, true> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution; };
92 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA; };
93 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA; };
94 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, true, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution; };
95 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution; };
98 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
99 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8; };
100 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_fp8; };
101 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8; };
102 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_fp8_fp8; };
103 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed; };
104 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed; };
105 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8; };
106 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed; };
107 template<> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_bf8; };
108 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_bf8; };
109 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8; };
110 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed; };
111 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8; };
112 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_bf8_bf8; };
113 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8; };
114 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed; };
115 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_bf8_bf8; };
116 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed; };
119 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 128, false, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_fp8<I>; };
120 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 128, false, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_bf8<I>; };
121 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, fp8_t, float, 16, 16, 128, false, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_fp8<I>; };
122 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 128, false, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_bf8<I>; };
123 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_fp8_CTransposed<I>; };
124 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_bf8_CTransposed<I>; };
125 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, fp8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_fp8_CTransposed<I>; };
126 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_bf8_CTransposed<I>; };
127 template<> struct Dispatcher<pk_fp4_t, pk_fp4_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_fp4<>; };
129 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_fp8<>; };
130 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_bf8<>; };
131 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_fp8<>; };
132 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_bf8<>; };
133 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_fp8_fp8<EQuad>; };
134 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_fp8_bf8<EQuad>; };
135 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_bf8_fp8<EQuad>; };
136 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_bf8_bf8<EQuad>; };
139 template<bool TransposeC> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f8_f8<TransposeC>; };
140 template<bool TransposeC> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf8_bf8<TransposeC>; };
141 template<bool TransposeC> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f8_bf8<TransposeC>; };
142 template<bool TransposeC> struct Dispatcher<bf8_t, fp8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf8_f8<TransposeC>; };
145 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
146 template<> struct Dispatcher<int8_t, int8_t, int32_t, 32, 32, 16, false> { using Type = WarpGemmMfma_i32_32x32x16_i8_i8; };
147 template<> struct Dispatcher<int8_t, int8_t, int32_t, 32, 32, 16, true> { using Type = WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed; };
148 template<> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 32, false> { using Type = WarpGemmMfma_i32_16x16x32_i8_i8; };
149 template<> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 32, true> { using Type = WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed; };
151 template<bool TransposeC> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_i32_16x16x16_i8_i8<TransposeC>;};
Definition: cluster_descriptor.hpp:13
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_32x32x16_i8_i8
Definition: warp_gemm.hpp:402
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_bf8_bf8
Definition: warp_gemm.hpp:307
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_bf8
Definition: warp_gemm.hpp:285
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_bf8
Definition: warp_gemm.hpp:273
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:237
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M16N16K16
Definition: warp_gemm.hpp:160
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M64N4K16
Definition: warp_gemm.hpp:146
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_fp8
Definition: warp_gemm.hpp:270
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleA
Definition: warp_gemm.hpp:73
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:127
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution
Definition: warp_gemm.hpp:203
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution
Definition: warp_gemm.hpp:199
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_fp8_fp8
Definition: warp_gemm.hpp:303
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_bf8
Definition: warp_gemm.hpp:267
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed
Definition: warp_gemm.hpp:383
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed
Definition: warp_gemm.hpp:292
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution
Definition: warp_gemm.hpp:77
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed
Definition: warp_gemm.hpp:391
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed
Definition: warp_gemm.hpp:299
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M32N32K16< WGAttrCtlEnum::Default_ > >> WarpGemmSmfmacF16F16F32M32N32K16
Definition: warp_gemm.hpp:150
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF32F32F32M16N16K4
Definition: warp_gemm.hpp:18
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M16N16K16
Definition: warp_gemm.hpp:39
typename impl::warp_gemm_dispatcher::Dispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition: warp_gemm_dispatcher.hpp:177
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_bf8_bf8
Definition: warp_gemm.hpp:281
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_16x16x32_i8_i8
Definition: warp_gemm.hpp:409
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_bf8_bf8
Definition: warp_gemm.hpp:295
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA
Definition: warp_gemm.hpp:195
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed
Definition: warp_gemm.hpp:379
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8
Definition: warp_gemm.hpp:157
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed
Definition: warp_gemm.hpp:406
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_fp8
Definition: warp_gemm.hpp:261
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed
Definition: warp_gemm.hpp:413
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA
Definition: warp_gemm.hpp:190
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M64N4K16
Definition: warp_gemm.hpp:256
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_fp8
Definition: warp_gemm.hpp:288
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed
Definition: warp_gemm.hpp:387
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:247
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M16N16K32< WGAttrCtlEnum::Default_ > >> WarpGemmSmfmacF16F16F32M16N16K32
Definition: warp_gemm.hpp:153
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8
Definition: warp_gemm.hpp:36
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:137
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaF16F16F32M32N32K8SwizzleA
Definition: warp_gemm.hpp:69
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M4N64K16
Definition: warp_gemm.hpp:142
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_bf8
Definition: warp_gemm.hpp:264
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution
Definition: warp_gemm.hpp:81
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_fp8
Definition: warp_gemm.hpp:277
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M4N64K16
Definition: warp_gemm.hpp:252
Definition: warp_gemm_impl.hpp:11
Definition: warp_gemm_smfmac_impl.hpp:11
Definition: warp_gemm_dispatcher.hpp:30
Definition: pk_fp4.hpp:76