/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File
pk_fp4.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <cmath>
10 
11 #if defined(__gfx950__)
12 #define CK_TILE_FP4_CVT_DEVICE 1
13 #else
14 #define CK_TILE_FP4_CVT_DEVICE 0
15 #endif
16 
17 #define TEST_convert_with_table 0
18 
19 namespace ck_tile {
20 
21 using fp32_t = float;
22 using fp32x2_t = float __attribute__((ext_vector_type(2)));
23 using fp16x2_t = _Float16 __attribute__((ext_vector_type(2)));
24 using bf16x2_t = bfloat16_t __attribute__((ext_vector_type(2)));
25 
26 // Helpers: constexpr-safe access to elements of ext_vector_type(2)
27 // Some compilers don't allow operator[] in constant expressions for vector types.
28 // We use bit_cast to a trivially copyable representation to extract lanes.
29 namespace detail {
31 {
32  _Float16 e[2];
33 };
35 {
37 };
39 {
40  float e[2];
41 };
42 
43 CK_TILE_HOST_DEVICE constexpr _Float16 lane0(const fp16x2_t& v)
44 {
45  return ck_tile::bit_cast<fp16x2_repr>(v).e[0];
46 }
47 CK_TILE_HOST_DEVICE constexpr _Float16 lane1(const fp16x2_t& v)
48 {
49  return ck_tile::bit_cast<fp16x2_repr>(v).e[1];
50 }
51 
53 {
54  return ck_tile::bit_cast<bf16x2_repr>(v).e[0];
55 }
57 {
58  return ck_tile::bit_cast<bf16x2_repr>(v).e[1];
59 }
60 
61 CK_TILE_HOST_DEVICE constexpr float lane0(const fp32x2_t& v)
62 {
63  return ck_tile::bit_cast<fp32x2_repr>(v).e[0];
64 }
65 CK_TILE_HOST_DEVICE constexpr float lane1(const fp32x2_t& v)
66 {
67  return ck_tile::bit_cast<fp32x2_repr>(v).e[1];
68 }
69 } // namespace detail
70 
71 struct pk_float4_e2m1_t;
72 CK_TILE_HOST_DEVICE constexpr pk_float4_e2m1_t float_to_pk_fp4(const float& x, float scale = 1.f);
73 
74 // TODO: Add stochastic method
76 {
77  // TODO: Can we merge raw_type and type?
78  using raw_type = uint8_t;
79  using type = raw_type;
81 
83  template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
84  CK_TILE_HOST_DEVICE constexpr pk_float4_e2m1_t(T init) : data{static_cast<type>(init)}
85  {
86  }
87  CK_TILE_HOST_DEVICE explicit constexpr pk_float4_e2m1_t(float init, float scale = 1.f)
88  : data{float_to_pk_fp4(init, scale)}
89  {
90  }
91  CK_TILE_HOST_DEVICE constexpr operator type() const { return data; }
92  CK_TILE_HOST_DEVICE constexpr type& get() { return data; }
93  CK_TILE_HOST_DEVICE constexpr type get() const { return data; }
94 
95  CK_TILE_HOST_DEVICE constexpr float to_float(float scale = 1.f) const;
96  CK_TILE_HOST_DEVICE constexpr fp32x2_t to_fp32x2(float scale = 1.f) const;
97  CK_TILE_HOST_DEVICE constexpr fp16_t to_fp16(float scale = 1.f) const;
98  CK_TILE_HOST_DEVICE constexpr fp16x2_t to_fp16x2(float scale = 1.f) const;
99  CK_TILE_HOST_DEVICE constexpr bf16_t to_bf16(float scale = 1.f) const;
100  CK_TILE_HOST_DEVICE constexpr bf16x2_t to_bf16x2(float scale = 1.f) const;
101 
102  CK_TILE_HOST_DEVICE constexpr operator float() const { return to_float(); }
103  CK_TILE_HOST_DEVICE constexpr operator fp32x2_t() const { return to_fp32x2(); }
104  CK_TILE_HOST_DEVICE constexpr operator fp16_t() const { return to_fp16(); }
105  CK_TILE_HOST_DEVICE constexpr operator fp16x2_t() const { return to_fp16x2(); }
106  CK_TILE_HOST_DEVICE constexpr operator bf16_t() const { return to_bf16(); }
107  CK_TILE_HOST_DEVICE constexpr operator bf16x2_t() const { return to_bf16x2(); }
108 
109  template <index_t I>
111  {
112  return _unpack(number<I>{});
113  }
115  const pk_float4_e2m1_t& x1)
116  {
117  return _pack(x0.get(), x1.get());
118  }
119 
120  template <index_t I>
122  CK_TILE_HOST_DEVICE constexpr static type _pack(const type x0, const type x1)
123  {
124  return (x1 << 4) | (x0 & 0b00001111);
125  }
126 
127 #if TEST_convert_with_table
128  static constexpr float e2m1_to_fp32_table[16] = {
129  0, 0.5, 1, 1.5, 2, 3, 4, 6, -0, -0.5, -1, -1.5, -2, -3, -4, -6};
130  static constexpr fp16_t e2m1_to_fp16_table[16] = {
131  bit_cast<fp16_t>(static_cast<uint16_t>(0x0000)), // 0
132  bit_cast<fp16_t>(static_cast<uint16_t>(0x3800)), // 0.5
133  bit_cast<fp16_t>(static_cast<uint16_t>(0x3C00)), // 1
134  bit_cast<fp16_t>(static_cast<uint16_t>(0x3E00)), // 1.5
135  bit_cast<fp16_t>(static_cast<uint16_t>(0x4000)), // 2
136  bit_cast<fp16_t>(static_cast<uint16_t>(0x4200)), // 3
137  bit_cast<fp16_t>(static_cast<uint16_t>(0x4400)), // 4
138  bit_cast<fp16_t>(static_cast<uint16_t>(0x4600)), // 6
139  bit_cast<fp16_t>(static_cast<uint16_t>(0x8000)), // -0
140  bit_cast<fp16_t>(static_cast<uint16_t>(0xB800)), // -0.5
141  bit_cast<fp16_t>(static_cast<uint16_t>(0xBC00)), // -1
142  bit_cast<fp16_t>(static_cast<uint16_t>(0xBE00)), // -1.5
143  bit_cast<fp16_t>(static_cast<uint16_t>(0xC000)), // -2
144  bit_cast<fp16_t>(static_cast<uint16_t>(0xC200)), // -3
145  bit_cast<fp16_t>(static_cast<uint16_t>(0xC400)), // -4
146  bit_cast<fp16_t>(static_cast<uint16_t>(0xC600)) // -6
147  };
148 #endif
149 };
150 
152 using pk_fp4_raw_t = typename pk_fp4_t::type;
153 
154 template <>
156 {
158 
159  static constexpr int exp = 2;
160  static constexpr int mant = 1;
161  static constexpr int bias = 1;
162  static constexpr int PackedSize = 2;
163 };
164 
165 // limits
166 template <class T>
167 struct numeric;
168 
169 template <>
171 {
172  static constexpr pk_fp4_raw_t binary_min_normal = 0b00100010; // 1
173  static constexpr pk_fp4_raw_t binary_max_normal = 0b01110111; // 6
174  static constexpr pk_fp4_raw_t binary_lowest_normal = 0b11111111; // -6
175  static constexpr pk_fp4_raw_t binary_min_subnorm = 0b00010001; // 0.5
176  static constexpr pk_fp4_raw_t binary_max_subnorm = 0b00010001; // 0.5
177  static constexpr pk_fp4_raw_t binary_zero = 0b00000000; // 0
178  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t min() { return binary_min_normal; }
179  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t max() { return binary_max_normal; }
180  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t lowest() { return binary_lowest_normal; }
181  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t epsilon() { return binary_min_subnorm; }
182  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t round_error() { return binary_min_subnorm; }
183  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t zero() { return binary_zero; }
184  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t denorm_min() { return binary_min_subnorm; }
185 
186  CK_TILE_HOST_DEVICE static constexpr bool has_inf() { return false; }
187  // N/A
188  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t infinity() { return max(); }
189  // N/A
190  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t quiet_NaN() { return max(); }
191  // N/A
192  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t signaling_NaN() { return max(); }
193 };
194 
195 template <index_t I>
196 CK_TILE_HOST_DEVICE constexpr pk_fp4_raw_t pk_fp4_t::_unpack(number<I>) const
197 {
198  static_assert(I < 2, "Index is out of range.");
199  if constexpr(I == 1)
200  return (data >> 4);
201  else
202  return data & 0b00001111;
203 }
205 // TODO: consider replace this macro to improve performance
206 
207 #if CK_TILE_FP4_CVT_DEVICE
208 namespace impl {
209 
210 template <typename T>
211 CK_TILE_DEVICE T _from_f4(pk_fp4_raw_t src, float scale = 1.0f)
212 {
213  if constexpr(std::is_same_v<T, fp32_t>)
214  {
215  fp32x2_t tmp = __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(src, scale, 0);
216  return detail::lane0(tmp);
217  }
218  else if constexpr(std::is_same_v<T, fp32x2_t>)
219  return __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(src, scale, 0);
220  else if constexpr(std::is_same_v<T, fp16_t>)
221  {
222  fp16x2_t tmp = __builtin_amdgcn_cvt_scalef32_pk_f16_fp4(src, scale, 0);
223  return detail::lane0(tmp);
224  }
225  else if constexpr(std::is_same_v<T, fp16x2_t>)
226  return __builtin_amdgcn_cvt_scalef32_pk_f16_fp4(src, scale, 0);
227  else if constexpr(std::is_same_v<T, bf16_t>)
228  {
229  bf16x2_t tmp = __builtin_amdgcn_cvt_scalef32_pk_bf16_fp4(src, scale, 0);
230  return detail::lane0(tmp);
231  }
232  else if constexpr(std::is_same_v<T, bf16x2_t>)
233  return __builtin_amdgcn_cvt_scalef32_pk_bf16_fp4(src, scale, 0);
234  else
235  static_assert(std::false_type::value, "Unsupported type.");
236  return T{};
237 }
238 template <typename T>
239 CK_TILE_DEVICE pk_fp4_raw_t _to_f4(T src, float scale = 1.0f)
240 {
241  union
242  {
243  uint32_t u32;
244  pk_fp4_raw_t pf4[4];
245  } cvt{0};
246  if constexpr(std::is_same_v<T, fp32_t>)
247  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(cvt.u32, src, src, scale, 0);
248  else if constexpr(std::is_same_v<T, fp32x2_t>)
249  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(
250  cvt.u32, detail::lane0(src), detail::lane1(src), scale, 0);
251  else if constexpr(std::is_same_v<T, fp16_t>)
252  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(cvt.u32, fp16x2_t{src, src}, scale, 0);
253  else if constexpr(std::is_same_v<T, fp16x2_t>)
254  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(cvt.u32, src, scale, 0);
255  else if constexpr(std::is_same_v<T, bf16_t>)
256  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_bf16(cvt.u32, bf16x2_t{src, src}, scale, 0);
257  else if constexpr(std::is_same_v<T, bf16x2_t>)
258  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_bf16(cvt.u32, src, scale, 0);
259  else
260  static_assert(std::false_type::value, "Unsupported type.");
261  return cvt.pf4[0];
262 }
263 
264 } // namespace impl
265 #endif
266 
267 CK_TILE_HOST_DEVICE constexpr bf16_t pk_fp4_t::to_bf16(float scale) const
268 {
269 #if CK_TILE_FP4_CVT_DEVICE
270  return impl::_from_f4<bf16_t>(data, scale);
271 #else
272  return bf16_t{type_convert<bf16_t>(convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale))};
273 #endif
274 }
275 
277 {
278 #if CK_TILE_FP4_CVT_DEVICE
279  return impl::_from_f4<bf16x2_t>(data, scale);
280 #else
281  return bf16x2_t{type_convert<bf16_t>(convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale)),
282  type_convert<bf16_t>(convert_to_float<pk_fp4_t>(_unpack(number<1>{}), scale))};
283 #endif
284 }
285 
286 // TODO: make it generic so that we can convert from directrly.
287 CK_TILE_HOST_DEVICE constexpr pk_fp4_raw_t float_to_mxfp4(float x, float scale)
288 {
289 #if CK_TILE_FP4_CVT_DEVICE
290  return impl::_to_f4(x, scale);
291 #else
292  return convert_to_type<pk_fp4_t>(x, scale);
293 #endif
294 }
295 CK_TILE_HOST_DEVICE constexpr pk_fp4_t float_to_pk_fp4(const float& x, float scale)
296 {
297 #if CK_TILE_FP4_CVT_DEVICE
298  return impl::_to_f4(x, scale);
299 #else
300  auto res = convert_to_type<pk_fp4_t>(x, scale);
301  return pk_fp4_t::_pack(res, res);
302 #endif
303 }
304 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp16_to_pk_fp4(const fp16_t& x, float scale)
305 {
306 #if CK_TILE_FP4_CVT_DEVICE
307  return impl::_to_f4(x, scale);
308 #else
309  auto res = float_to_mxfp4(type_convert<float>(x), scale);
310  return pk_fp4_t::_pack(res, res);
311 #endif
312 }
313 CK_TILE_HOST_DEVICE constexpr pk_fp4_t bf16_to_pk_fp4(const bf16_t& x, float scale)
314 {
315 #if CK_TILE_FP4_CVT_DEVICE
316  return impl::_to_f4(x, scale);
317 #else
318  auto res = float_to_mxfp4(type_convert<float>(x), scale);
319  return pk_fp4_t::_pack(res, res);
320 #endif
321 }
322 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp16x2_to_pk_fp4(const fp16x2_t& x, float scale)
323 {
324 #if CK_TILE_FP4_CVT_DEVICE
325  return impl::_to_f4(x, scale);
326 #else
327  return pk_fp4_t::_pack(float_to_mxfp4(detail::lane0(x), scale),
328  float_to_mxfp4(detail::lane1(x), scale));
329 #endif
330 }
331 CK_TILE_HOST_DEVICE constexpr pk_fp4_t bf16x2_to_pk_fp4(const bf16x2_t& x, float scale)
332 {
333 #if CK_TILE_FP4_CVT_DEVICE
334  return impl::_to_f4(x, scale);
335 #else
336  return pk_fp4_t::_pack(float_to_mxfp4(detail::lane0(x), scale),
337  float_to_mxfp4(detail::lane1(x), scale));
338 #endif
339 }
340 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp32x2_to_pk_fp4(const fp32x2_t& x, float scale)
341 {
342 #if CK_TILE_FP4_CVT_DEVICE
343  return impl::_to_f4(x, scale);
344 #else
345  return pk_fp4_t::_pack(float_to_mxfp4(detail::lane0(x), scale),
346  float_to_mxfp4(detail::lane1(x), scale));
347 #endif
348 }
349 
350 CK_TILE_HOST_DEVICE constexpr fp32x2_t pk_fp4_to_fp32x2(const pk_fp4_t& x, float scale)
351 {
352  return x.to_fp32x2(scale);
353 }
354 CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_to_fp16x2(const pk_fp4_t& x, float scale)
355 {
356  return x.to_fp16x2(scale);
357 }
358 CK_TILE_HOST_DEVICE constexpr bf16x2_t pk_fp4_to_bf16x2(const pk_fp4_t& x, float scale)
359 {
360  return x.to_bf16x2(scale);
361 }
362 CK_TILE_HOST_DEVICE constexpr float pk_fp4_to_float(const pk_fp4_t& x, float scale)
363 {
364  return x.to_float(scale);
365 }
366 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_to_fp16(const pk_fp4_t& x, float scale)
367 {
368  return x.to_fp16(scale);
369 }
370 CK_TILE_HOST_DEVICE constexpr bf16_t pk_fp4_to_bf16(const pk_fp4_t& x, float scale)
371 {
372  return x.to_bf16(scale);
373 }
374 
375 #if TEST_convert_with_table == 0
376 CK_TILE_HOST_DEVICE constexpr float pk_fp4_t::to_float(float scale) const
377 {
378 #if CK_TILE_FP4_CVT_DEVICE
379  return impl::_from_f4<fp32_t>(data, scale);
380 #else
381  return convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale);
382 #endif
383 }
385 {
386 #if CK_TILE_FP4_CVT_DEVICE
387  return impl::_from_f4<fp32x2_t>(data, scale);
388 #else
389  return fp32x2_t{convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale),
390  convert_to_float<pk_fp4_t>(_unpack(number<1>{}), scale)};
391 #endif
392 }
393 
394 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_t::to_fp16(float scale) const
395 {
396 #if CK_TILE_FP4_CVT_DEVICE
397  return impl::_from_f4<fp16_t>(data, scale);
398 #else
399  return fp16_t{type_convert<fp16_t>(convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale))};
400 #endif
401 }
403 {
404 #if CK_TILE_FP4_CVT_DEVICE
405  return impl::_from_f4<fp16x2_t>(data, scale);
406 #else
407  return fp16x2_t{type_convert<fp16_t>(convert_to_float<pk_fp4_t>(_unpack(number<0>{}), scale)),
408  type_convert<fp16_t>(convert_to_float<pk_fp4_t>(_unpack(number<1>{}), scale))};
409 #endif
410 }
411 #else
412 CK_TILE_HOST_DEVICE constexpr float pk_fp4_t::to_float(float scale) const
413 {
414  return e2m1_to_fp32_table[_unpack(number<0>{})] * scale;
415 }
416 CK_TILE_HOST_DEVICE constexpr fp32x2_t pk_fp4_t::to_fp32x2(float scale) const
417 {
418  return fp32x2_t{e2m1_to_fp32_table[_unpack(number<0>{})] * scale, e2m1_to_fp32_table[_unpack(number<1>{}] * scale};
419 }
420 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_t::to_fp16(float scale) const
421 {
422  return type_convert<float>(e2m1_to_fp16_table[_unpack(number<0>{})]) * scale;
423 }
424 CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_t::to_fp16x2(float scale) const
425 {
426  return fp16x2_t{
427  type_convert<fp16_t>(type_convert<float>(e2m1_to_fp16_table[_unpack(number<0>{})]) * scale),
428  type_convert<fp16_t>(type_convert<float>(e2m1_to_fp16_table[_unpack(number<1>{})]) *
429  scale)};
430 }
431 #endif
432 
433 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:41
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:42
constexpr CK_TILE_HOST_DEVICE _Float16 lane0(const fp16x2_t &v)
Definition: pk_fp4.hpp:43
constexpr CK_TILE_HOST_DEVICE _Float16 lane1(const fp16x2_t &v)
Definition: pk_fp4.hpp:47
Definition: cluster_descriptor.hpp:13
typename pk_fp4_t::type pk_fp4_raw_t
Definition: pk_fp4.hpp:152
ushort bfloat16_t
Definition: bfloat16.hpp:111
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp16_to_pk_fp4(const fp16_t &x, float scale)
Definition: pk_fp4.hpp:304
bfloat16_t bf16x2_t
Definition: pk_fp4.hpp:24
_Float16 fp16_t
Definition: half.hpp:110
float fp32x2_t
Definition: pk_fp4.hpp:22
bfloat16_t bf16_t
Definition: bfloat16.hpp:113
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp32x2_to_pk_fp4(const fp32x2_t &x, float scale)
Definition: pk_fp4.hpp:340
pk_float4_e2m1_t pk_fp4_t
Definition: pk_fp4.hpp:151
float fp32_t
Definition: pk_fp4.hpp:21
_Float16 fp16x2_t
Definition: half.hpp:385
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t float_to_pk_fp4(const float &x, float scale=1.f)
Definition: pk_fp4.hpp:295
constexpr CK_TILE_HOST_DEVICE float pk_fp4_to_float(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:362
constexpr CK_TILE_HOST_DEVICE fp16_t pk_fp4_to_fp16(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:366
CK_TILE_DEVICE bfloat16_t exp(bfloat16_t x)
Definition: bfloat16.hpp:411
constexpr CK_TILE_HOST_DEVICE pk_fp4_t bf16x2_to_pk_fp4(const bf16x2_t &x, float scale)
Definition: pk_fp4.hpp:331
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp16x2_to_pk_fp4(const fp16x2_t &x, float scale)
Definition: pk_fp4.hpp:322
constexpr CK_TILE_HOST_DEVICE fp32x2_t pk_fp4_to_fp32x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:350
constexpr CK_TILE_HOST_DEVICE pk_fp4_raw_t float_to_mxfp4(float x, float scale)
Definition: pk_fp4.hpp:287
constexpr CK_TILE_HOST_DEVICE pk_fp4_t bf16_to_pk_fp4(const bf16_t &x, float scale)
Definition: pk_fp4.hpp:313
constexpr CK_TILE_HOST_DEVICE fp16x2_t pk_fp4_to_fp16x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:354
constexpr CK_TILE_HOST_DEVICE bf16_t pk_fp4_to_bf16(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:370
constexpr CK_TILE_HOST_DEVICE bf16x2_t pk_fp4_to_bf16x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:358
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
unsigned short uint16_t
Definition: stdint.h:125
unsigned int uint32_t
Definition: stdint.h:126
unsigned char uint8_t
Definition: stdint.h:124
Definition: integral_constant.hpp:13
Definition: pk_fp4.hpp:35
bfloat16_t e[2]
Definition: pk_fp4.hpp:36
Definition: pk_fp4.hpp:31
_Float16 e[2]
Definition: pk_fp4.hpp:32
Definition: pk_fp4.hpp:39
float e[2]
Definition: pk_fp4.hpp:40
static constexpr CK_TILE_HOST_DEVICE bool has_inf()
Definition: pk_fp4.hpp:186
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t min()
Definition: pk_fp4.hpp:178
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t denorm_min()
Definition: pk_fp4.hpp:184
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t infinity()
Definition: pk_fp4.hpp:188
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t round_error()
Definition: pk_fp4.hpp:182
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t epsilon()
Definition: pk_fp4.hpp:181
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t zero()
Definition: pk_fp4.hpp:183
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t quiet_NaN()
Definition: pk_fp4.hpp:190
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t signaling_NaN()
Definition: pk_fp4.hpp:192
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t lowest()
Definition: pk_fp4.hpp:180
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t max()
Definition: pk_fp4.hpp:179
pk_fp4_raw_t bitwise_type
Definition: pk_fp4.hpp:157
Definition: numeric.hpp:81
static constexpr int PackedSize
Definition: numeric.hpp:82
Definition: numeric.hpp:18
static constexpr CK_TILE_HOST_DEVICE T max()
Definition: numeric.hpp:26
Definition: pk_fp4.hpp:76
constexpr CK_TILE_HOST_DEVICE bf16x2_t to_bf16x2(float scale=1.f) const
Definition: pk_fp4.hpp:276
constexpr CK_TILE_HOST_DEVICE fp16x2_t to_fp16x2(float scale=1.f) const
Definition: pk_fp4.hpp:402
constexpr CK_TILE_HOST_DEVICE fp16_t to_fp16(float scale=1.f) const
Definition: pk_fp4.hpp:394
constexpr CK_TILE_HOST_DEVICE float to_float(float scale=1.f) const
Definition: pk_fp4.hpp:376
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t()
Definition: pk_fp4.hpp:82
uint8_t raw_type
Definition: pk_fp4.hpp:78
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t(float init, float scale=1.f)
Definition: pk_fp4.hpp:87
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t unpack(number< I >) const
Definition: pk_fp4.hpp:110
constexpr CK_TILE_HOST_DEVICE type & get()
Definition: pk_fp4.hpp:92
constexpr CK_TILE_HOST_DEVICE type _unpack(number< I >) const
constexpr CK_TILE_HOST_DEVICE type get() const
Definition: pk_fp4.hpp:93
constexpr CK_TILE_HOST_DEVICE fp32x2_t to_fp32x2(float scale=1.f) const
Definition: pk_fp4.hpp:384
constexpr CK_TILE_HOST_DEVICE bf16_t to_bf16(float scale=1.f) const
Definition: pk_fp4.hpp:267
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t(T init)
Definition: pk_fp4.hpp:84
type data
Definition: pk_fp4.hpp:80
raw_type type
Definition: pk_fp4.hpp:79
constexpr static CK_TILE_HOST_DEVICE pk_float4_e2m1_t pack(const pk_float4_e2m1_t &x0, const pk_float4_e2m1_t &x1)
Definition: pk_fp4.hpp:114
constexpr static CK_TILE_HOST_DEVICE type _pack(const type x0, const type x1)
Definition: pk_fp4.hpp:122
#define CK_TILE_ARITHMETIC_USING_FLOAT(attr_, type_)
Definition: numeric.hpp:106