/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/element/quantization_operation.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/element/quantization_operation.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/element/quantization_operation.hpp Source File
quantization_operation.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 // #include "ck/utility/get_id.hpp"
5 
6 namespace ck {
7 namespace tensor_operation {
8 namespace element_wise {
9 
10 // Y = Sy * Qy
11 // W = Sw * Qw
12 // X = Sx * Qx
13 // B = Sb * Qb = Sw * Sx * Qb
14 // Where X, W, Y are float32, Qx, Qw, Qy are int8
15 // Sx, Sw, Sy are scale of x, w, y (float32), which is calculated from quantization range
16 // Qb is int32, scale of B is Sw * Sx for convenient
17 
18 // Y = W @ X, where @ is convolution or matrix multiplication
19 // Sy * Qy = Sw * Qw @ Sx * Qx
20 // Qy = [(Sw*Sx)/Sy] * Qw @ Qx
21 
22 // For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
23 // Activation(Sy * Qy) = Sy * Activation(Qy)
24 template <typename Activation>
26 {
27  // Convolution + Activation (piecewise linear function)
28  // If an activation is piecewise linear function, then Activation(Sy * Qy) = Sy * Activation(Qy)
29  // Z = Activation(Y) = Activation(W @ X)
30  // Sz * Qz = Activation(Sy * Qy)
31  // Qz = Sy / Sz * Activation(Qy) = (Sw * Sx / Sz) * Activation(Qw @ Qx)
32 
33  // requantScale_ = Sw * Sx / Sz
34  Activation_Mul_Clamp(float requantScale, Activation activationOp)
35  : requantScale_(requantScale), activationOp_(activationOp)
36  {
37  }
38 
39  __host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
40  {
41  float y_fp32 = ck::type_convert<float>(x);
42  activationOp_(y_fp32, y_fp32);
43  y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
44  y = ck::type_convert<int8_t>(y_fp32);
45  }
46 
47  __device__ constexpr void operator()(int32_t& y, const int32_t& x) const
48  {
49  // CAUSION - We might type_convert to int8 in threadwise copy
50  // eg. GridwiseGemmDlMultipleD_km_kn_mn
51  float y_fp32 = ck::type_convert<float>(x);
52  activationOp_(y_fp32, y_fp32);
53  y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
54  y = ck::type_convert<int32_t>(y_fp32);
55  }
56 
57  __host__ constexpr void operator()(float& y, const float& x) const
58  {
59  // CAUSION - We might float in & float out in reference code
60  activationOp_(y, x);
61  y = math::clamp(requantScale_ * y, -128.f, 127.f);
62  }
63 
65  Activation activationOp_;
66 };
67 
68 // For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
69 // If an activation is not piecewise linear function
70 // then Activation(Sy * Qy) != Sy * Activation(Qy)
71 template <typename Activation>
73 {
74  // Convolution + Activation (non piecewise linear function)
75  // Z = Activation(Y) = Activation(W @ X)
76  // Sz * Qz = Activation(Sy * Qy)
77  // Qz = S1 * Activation[Sacc * (Qw @ Qx)]
78  // Where S1 = 1 / Sz, Sacc = Sw * Sx
79  Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
80  : scale_z_inv_(scale_z_inv), scaleAcc_(scaleAcc), activationOp_(activationOp)
81  {
82  }
83 
84  __host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
85  {
86  float y_fp32 = ck::type_convert<float>(x);
87  y_fp32 = scaleAcc_ * y_fp32;
88  activationOp_(y_fp32, y_fp32);
89  y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
90  y = ck::type_convert<int8_t>(y_fp32);
91  }
92 
93  float scale_z_inv_;
94  float scaleAcc_;
95  Activation activationOp_;
96 };
97 
98 // Conv Perchannel quantization + Activation function which is piecewise linear function, such as
99 // relu, leaky relu ...etc
100 // Activation(Sy * Qy) = Sy * Activation(Qy)
101 template <typename Activation>
103 {
104  Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
105 
106  __host__ __device__ constexpr void
107  operator()(int8_t& y, const int32_t& x, const float& requantScale) const
108  {
109  float y_fp32 = ck::type_convert<float>(x);
110  activationOp_(y_fp32, y_fp32);
111  y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
112  y = ck::type_convert<int8_t>(y_fp32);
113  }
114 
115  __device__ constexpr void
116  operator()(int32_t& y, const int32_t& x, const float& requantScale) const
117  {
118  // CAUSION - We might type_convert to int8 in threadwise copy
119  // eg. GridwiseGemmDlMultipleD_km_kn_mn
120  float y_fp32 = ck::type_convert<float>(x);
121  activationOp_(y_fp32, y_fp32);
122  y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
123  y = ck::type_convert<int32_t>(y_fp32);
124  }
125 
126  Activation activationOp_;
127 };
128 
129 // For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
130 // Activation(Sy * Qy) = Sy * Activation(Qy)
131 template <typename Activation>
133 {
134  // Convolution + bias
135  // Let Bias = B = Sw * Sx * Qb
136  // Where Qb is int32
137  // Y = W @ X + B
138  // Sy * Qy = Sw * Qw @ Sx * Qx + Sw * Sx * Qb
139  // Qy = [(Sw*Sx)/Sy] * (Qw @ Qx + Qb)
140 
141  // For activation, Z = Activaiton(Y)
142  // Sz * Qz = Activation(Sy * Qy)
143  // Qz = Sy / Sz * Activation(Qy) = [(Sw*Sx)/Sz] * Activation(Qw @ Qx + Qb)
144  Add_Activation_Mul_Clamp(float requantScale, Activation activationOp)
145  : requantScale_(requantScale), activationOp_(activationOp)
146  {
147  }
148 
149  __host__ __device__ constexpr void
150  operator()(int8_t& y, const int32_t& x, const int32_t& bias) const
151  {
152  float y_fp32 = ck::type_convert<float>(x + bias);
153  activationOp_(y_fp32, y_fp32);
154  y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
155  y = ck::type_convert<int8_t>(y_fp32);
156  }
157 
158  __host__ __device__ constexpr void
159  operator()(int32_t& y, const int32_t& x, const int32_t& bias) const
160  {
161  // CAUSION - We might type_convert to int8 in threadwise copy
162  // eg. GridwiseGemmDlMultipleD_km_kn_mn
163  float y_fp32 = ck::type_convert<float>(x + bias);
164  activationOp_(y_fp32, y_fp32);
165  y_fp32 = math::clamp(requantScale_ * y_fp32, -128.f, 127.f);
166  y = ck::type_convert<int32_t>(y_fp32);
167  }
168 
170  Activation activationOp_;
171 };
172 
173 // Conv Perchannel quantization + Activation function which is piecewise linear function, such as
174 // relu, leaky relu ...etc
175 template <typename Activation>
177 {
178  Add_Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
179 
180  __host__ __device__ constexpr void
181  operator()(int8_t& y, const int32_t& x, const int32_t& bias, const float& requantScale) const
182  {
183  float y_fp32 = ck::type_convert<float>(x + bias);
184  activationOp_(y_fp32, y_fp32);
185  y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
186  y = ck::type_convert<int8_t>(y_fp32);
187  }
188 
189  __host__ __device__ constexpr void
190  operator()(int32_t& y, const int32_t& x, const int32_t& bias, const float& requantScale) const
191  {
192  // CAUSION - We might type_convert to int8 in threadwise copy
193  // eg. GridwiseGemmDlMultipleD_km_kn_mn
194  float y_fp32 = ck::type_convert<float>(x + bias);
195  activationOp_(y_fp32, y_fp32);
196  y_fp32 = math::clamp(requantScale * y_fp32, -128.f, 127.f);
197  y = ck::type_convert<int32_t>(y_fp32);
198  }
199 
200  Activation activationOp_;
201 };
202 
203 // For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
204 // If an activation is not piecewise linear function
205 // then Activation(Sy * Qy) != Sy * Activation(Qy)
206 template <typename Activation>
208 {
209  // Convolution + Activation (non piecewise linear function)
210  // Z = Activation(Y) = Activation(W @ X + B)
211  // Sz * Qz = Activation(Sy * Qy)
212  // Qz = S1 * Activation[Sacc * (Qw @ Qx + Qb)]
213  // Where S1 = 1 / Sz, Sacc = Sw * Sx
214  Add_Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
215  : scale_z_inv_(scale_z_inv), scaleAcc_(scaleAcc), activationOp_(activationOp)
216  {
217  }
218 
219  __host__ __device__ constexpr void
220  operator()(int8_t& y, const int32_t& x, const int32_t& bias) const
221  {
222  float y_fp32 = ck::type_convert<float>(x + bias);
223  y_fp32 = scaleAcc_ * y_fp32;
224  activationOp_(y_fp32, y_fp32);
225  y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
226  y = ck::type_convert<int8_t>(y_fp32);
227  }
228 
229  __host__ __device__ constexpr void
230  operator()(int32_t& y, const int32_t& x, const int32_t& bias) const
231  {
232  // CAUSION - We might type_convert to int8 in threadwise copy
233  // eg. GridwiseGemmDlMultipleD_km_kn_mn
234  float y_fp32 = ck::type_convert<float>(x + bias);
235  y_fp32 = scaleAcc_ * y_fp32;
236  activationOp_(y_fp32, y_fp32);
237  y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
238  y = ck::type_convert<int32_t>(y_fp32);
239  }
240 
242  float scaleAcc_;
243  Activation activationOp_;
244 };
245 
246 // Conv Perchannel quantization + Activation function which is non piecewise linear function,
247 // such as TanH, Sigmoid ...etc
248 // If an activation is not piecewise linear function
249 // then Activation(Sy *Qy) != Sy * Activation(Qy)
250 template <typename Activation>
252 {
253  Add_Mul2_Activation_Mul_Clamp(float scale_z_inv, Activation activationOp)
254  : scale_z_inv_(scale_z_inv), activationOp_(activationOp)
255  {
256  }
257 
258  __host__ __device__ constexpr void
259  operator()(int8_t& y, const int32_t& x, const int32_t& bias, const float& scaleAcc) const
260  {
261  float y_fp32 = ck::type_convert<float>(x + bias);
262  y_fp32 = scaleAcc * y_fp32;
263  activationOp_(y_fp32, y_fp32);
264  y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
265  y = ck::type_convert<int8_t>(y_fp32);
266  }
267 
268  __host__ __device__ constexpr void
269  operator()(int32_t& y, const int32_t& x, const int32_t& bias, const float& scaleAcc) const
270  {
271  // CAUSION - We might type_convert to int8 in threadwise copy
272  // eg. GridwiseGemmDlMultipleD_km_kn_mn
273  float y_fp32 = ck::type_convert<float>(x + bias);
274  y_fp32 = scaleAcc * y_fp32;
275  activationOp_(y_fp32, y_fp32);
276  y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
277  y = ck::type_convert<int32_t>(y_fp32);
278  }
279 
281  Activation activationOp_;
282 };
283 
284 } // namespace element_wise
285 } // namespace tensor_operation
286 } // namespace ck
__host__ constexpr __device__ T clamp(const T &x, const T &lowerbound, const T &upperbound)
Definition: math.hpp:148
int8_t int8_t
Definition: int8.hpp:20
Definition: ck.hpp:264
Definition: quantization_operation.hpp:103
Activation activationOp_
Definition: quantization_operation.hpp:126
constexpr __device__ void operator()(int32_t &y, const int32_t &x, const float &requantScale) const
Definition: quantization_operation.hpp:116
Activation_Mul2_Clamp(Activation activationOp)
Definition: quantization_operation.hpp:104
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x, const float &requantScale) const
Definition: quantization_operation.hpp:107
Definition: quantization_operation.hpp:26
Activation activationOp_
Definition: quantization_operation.hpp:65
float requantScale_
Definition: quantization_operation.hpp:64
constexpr __host__ void operator()(float &y, const float &x) const
Definition: quantization_operation.hpp:57
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x) const
Definition: quantization_operation.hpp:39
Activation_Mul_Clamp(float requantScale, Activation activationOp)
Definition: quantization_operation.hpp:34
constexpr __device__ void operator()(int32_t &y, const int32_t &x) const
Definition: quantization_operation.hpp:47
Definition: quantization_operation.hpp:177
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x, const int32_t &bias, const float &requantScale) const
Definition: quantization_operation.hpp:181
__host__ constexpr __device__ void operator()(int32_t &y, const int32_t &x, const int32_t &bias, const float &requantScale) const
Definition: quantization_operation.hpp:190
Add_Activation_Mul2_Clamp(Activation activationOp)
Definition: quantization_operation.hpp:178
Activation activationOp_
Definition: quantization_operation.hpp:200
Definition: quantization_operation.hpp:133
Activation activationOp_
Definition: quantization_operation.hpp:170
float requantScale_
Definition: quantization_operation.hpp:169
__host__ constexpr __device__ void operator()(int32_t &y, const int32_t &x, const int32_t &bias) const
Definition: quantization_operation.hpp:159
Add_Activation_Mul_Clamp(float requantScale, Activation activationOp)
Definition: quantization_operation.hpp:144
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x, const int32_t &bias) const
Definition: quantization_operation.hpp:150
Activation activationOp_
Definition: quantization_operation.hpp:281
__host__ constexpr __device__ void operator()(int32_t &y, const int32_t &x, const int32_t &bias, const float &scaleAcc) const
Definition: quantization_operation.hpp:269
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x, const int32_t &bias, const float &scaleAcc) const
Definition: quantization_operation.hpp:259
float scale_z_inv_
Definition: quantization_operation.hpp:280
Add_Mul2_Activation_Mul_Clamp(float scale_z_inv, Activation activationOp)
Definition: quantization_operation.hpp:253
float scaleAcc_
Definition: quantization_operation.hpp:242
__host__ constexpr __device__ void operator()(int32_t &y, const int32_t &x, const int32_t &bias) const
Definition: quantization_operation.hpp:230
Add_Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
Definition: quantization_operation.hpp:214
float scale_z_inv_
Definition: quantization_operation.hpp:241
Activation activationOp_
Definition: quantization_operation.hpp:243
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x, const int32_t &bias) const
Definition: quantization_operation.hpp:220
Definition: quantization_operation.hpp:73
float scaleAcc_
Definition: quantization_operation.hpp:94
Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
Definition: quantization_operation.hpp:79
float scale_z_inv_
Definition: quantization_operation.hpp:93
Activation activationOp_
Definition: quantization_operation.hpp:95
__host__ constexpr __device__ void operator()(int8_t &y, const int32_t &x) const
Definition: quantization_operation.hpp:84