/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/numeric/math.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/numeric/math.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck_tile/core/numeric/math.hpp Source File
math.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
10 #include <type_traits>
11 #include <stdint.h>
12 #include <cmath>
13 
14 namespace ck_tile {
15 
16 template <typename Scale, Scale lhs>
17 struct scales_c
18 {
19  template <typename Right>
20  CK_TILE_HOST_DEVICE constexpr auto operator()(const Right& rhs) const -> decltype(lhs * rhs)
21  {
22  return lhs * rhs;
23  }
24 };
25 
26 template <typename Scale>
27 struct scales
28 {
29  static_assert(std::is_copy_constructible_v<Scale>);
30 
31  CK_TILE_HOST_DEVICE constexpr explicit scales(Scale lhs) : lhs_(lhs) {}
32 
33  template <typename Right>
34  CK_TILE_HOST_DEVICE constexpr auto operator()(const Right& rhs) const
35  -> decltype(std::declval<const Scale&>() * rhs)
36  {
37  return lhs_ * rhs;
38  }
39 
40  private:
41  Scale lhs_;
42 };
43 
45 template <typename Scale>
46 __host__ __device__ scales(Scale)->scales<Scale>;
47 
48 template <typename Left = void, typename Right = Left>
49 struct plus
50 {
51  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
52  -> decltype(lhs + rhs)
53  {
54  return lhs + rhs;
55  }
56 };
57 
58 template <>
59 struct plus<void, void>
60 {
61  template <typename Left, typename Right>
62  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
63  -> decltype(lhs + rhs)
64  {
65  return lhs + rhs;
66  }
67 };
68 
70 __host__ __device__ plus()->plus<void, void>;
71 
72 template <typename Left = void, typename Right = Left>
73 struct minus
74 {
75  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
76  -> decltype(lhs - rhs)
77  {
78  return lhs - rhs;
79  }
80 };
81 
82 template <>
83 struct minus<void, void>
84 {
85  template <typename Left, typename Right>
86  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
87  -> decltype(lhs - rhs)
88  {
89  return lhs - rhs;
90  }
91 };
92 
94 __host__ __device__ minus()->minus<void, void>;
95 
96 template <typename Left = void, typename Right = Left>
97 struct multiplies
98 {
99  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
100  -> decltype(lhs * rhs)
101  {
102  return lhs * rhs;
103  }
104 };
105 
106 template <>
107 struct multiplies<void, void>
108 {
109  template <typename Left, typename Right>
110  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
111  -> decltype(lhs * rhs)
112  {
113  return lhs * rhs;
114  }
115 };
116 
118 __host__ __device__ multiplies()->multiplies<void, void>;
119 
120 template <typename T>
121 struct maximize
122 {
123  CK_TILE_HOST_DEVICE constexpr T operator()(T a, T b) const { return a >= b ? a : b; }
124 };
125 
126 template <typename T>
127 struct minimize
128 {
129  CK_TILE_HOST_DEVICE constexpr T operator()(T a, T b) const { return a <= b ? a : b; }
130 };
131 
132 template <typename T>
134 {
135  CK_TILE_HOST_DEVICE constexpr T operator()(T a, T b) const
136  {
137  static_assert(std::is_same<T, index_t>{} || std::is_same<T, int>{}, "wrong type");
138  return (a + b - number<1>{}) / b;
139  }
140 };
141 
142 template <typename X, typename Y>
144 {
145  return x / y;
146 }
147 
148 template <typename X, typename Y>
149 CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
150 {
151  return (x + y - number<1>{}) / y;
152 }
153 
154 template <typename X, typename Y>
156 {
157  return y * integer_divide_ceil(x, y);
158 }
159 
160 template <typename T>
161 CK_TILE_HOST_DEVICE constexpr T max(T x)
162 {
163  return x;
164 }
165 
166 template <typename T>
167 CK_TILE_HOST constexpr T max(T x, T y)
168 {
169  return x > y ? x : y;
170 }
171 
172 template <typename T>
173 CK_TILE_DEVICE constexpr T max(T x, T y)
174 {
175  return x > y ? x : y;
176 }
177 
178 template <>
179 CK_TILE_DEVICE constexpr float max(float x, float y)
180 {
181  return __builtin_fmaxf(x, y); // can resultin v_max3_f32
182 }
183 
184 template <>
185 CK_TILE_DEVICE constexpr double max(double x, double y)
186 {
187  return __builtin_fmax(x, y); // maybe still v_max3_f32
188 }
189 
190 template <index_t X>
192 {
193  return X > y ? X : y;
194 }
195 
196 template <index_t Y>
198 {
199  return x > Y ? x : Y;
200 }
201 
202 template <typename X, typename... Ys>
203 CK_TILE_HOST_DEVICE constexpr auto max(X x, Ys... ys)
204 {
205  static_assert(sizeof...(Ys) > 0, "not enough argument");
206  return max(x, max(ys...));
207 }
208 
209 template <typename T>
210 CK_TILE_HOST_DEVICE constexpr T min(T x)
211 {
212  return x;
213 }
214 
215 template <typename T>
216 CK_TILE_HOST constexpr T min(T x, T y)
217 {
218  return x < y ? x : y;
219 }
220 
221 template <typename T>
222 CK_TILE_DEVICE constexpr T min(T x, T y)
223 {
224  return x < y ? x : y;
225 }
226 
227 template <>
228 CK_TILE_DEVICE constexpr float min(float x, float y)
229 {
230  return __builtin_fminf(x, y);
231 }
232 
233 template <>
234 CK_TILE_DEVICE constexpr double min(double x, double y)
235 {
236  return __builtin_fmin(x, y);
237 }
238 
239 template <index_t X>
241 {
242  return X < y ? X : y;
243 }
244 
245 template <index_t Y>
247 {
248  return x < Y ? x : Y;
249 }
250 
251 template <typename X, typename... Ys>
252 CK_TILE_HOST_DEVICE constexpr auto min(X x, Ys... ys)
253 {
254  static_assert(sizeof...(Ys) > 0, "not enough argument");
255  return min(x, min(ys...));
256 }
257 
258 template <typename T>
259 CK_TILE_HOST_DEVICE constexpr T clamp(const T& x, const T& lowerbound, const T& upperbound)
260 {
261  return min(max(x, lowerbound), upperbound);
262 }
263 
264 CK_TILE_HOST int clz(uint32_t x) { return __builtin_clz(x); }
265 CK_TILE_DEVICE int clz(uint32_t x) { return __clz(x); }
266 
267 // greatest common divisor, aka highest common factor
269 {
270  if(x < 0)
271  {
272  return gcd(-x, y);
273  }
274  else if(y < 0)
275  {
276  return gcd(x, -y);
277  }
278  else if(x == y || x == 0)
279  {
280  return y;
281  }
282  else if(y == 0)
283  {
284  return x;
285  }
286  else if(x > y)
287  {
288  return gcd(x % y, y);
289  }
290  else
291  {
292  return gcd(x, y % x);
293  }
294 }
295 
296 template <index_t X, index_t Y>
298 {
299  constexpr auto r = gcd(X, Y);
300 
301  return number<r>{};
302 }
303 
304 template <typename X,
305  typename... Ys,
306  typename std::enable_if<sizeof...(Ys) >= 2, bool>::type = false>
307 CK_TILE_HOST_DEVICE constexpr auto gcd(X x, Ys... ys)
308 {
309  return gcd(x, gcd(ys...));
310 }
311 
312 // least common multiple
313 template <typename X, typename Y>
314 CK_TILE_HOST_DEVICE constexpr auto lcm(X x, Y y)
315 {
316  return (x * y) / gcd(x, y);
317 }
318 
319 template <typename X,
320  typename... Ys,
321  typename std::enable_if<sizeof...(Ys) >= 2, bool>::type = false>
322 CK_TILE_HOST_DEVICE constexpr auto lcm(X x, Ys... ys)
323 {
324  return lcm(x, lcm(ys...));
325 }
326 
327 template <typename Left = void, typename Right = Left>
328 struct equal
329 {
330  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
331  -> decltype(lhs == rhs)
332  {
333  return lhs == rhs;
334  }
335 };
336 
337 template <>
339 {
340  template <typename Left, typename Right>
341  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
342  -> decltype(lhs == rhs)
343  {
344  return lhs == rhs;
345  }
346 };
347 
349 __host__ __device__ equal()->equal<void, void>;
350 
351 template <>
352 struct equal<float, float>
353 {
354  CK_TILE_HOST_DEVICE constexpr bool operator()(float lhs, float rhs) const
355  {
356  return bit_cast<uint32_t>(lhs) == bit_cast<uint32_t>(rhs);
357  }
358 };
359 
360 template <>
361 struct equal<double, double>
362 {
363  CK_TILE_HOST_DEVICE constexpr bool operator()(double lhs, double rhs) const
364  {
365  return bit_cast<uint64_t>(lhs) == bit_cast<uint64_t>(rhs);
366  }
367 };
368 
369 template <typename Left = void, typename Right = Left>
370 struct less
371 {
372  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
373  -> decltype(lhs < rhs)
374  {
375  return lhs < rhs;
376  }
377 };
378 
379 template <>
380 struct less<void, void>
381 {
382  template <typename Left, typename Right>
383  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
384  -> decltype(lhs < rhs)
385  {
386  return lhs < rhs;
387  }
388 };
389 
391 __host__ __device__ less()->less<void, void>;
392 
393 template <typename Left = void, typename Right = Left>
395 {
396  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
397  -> decltype(lhs <= rhs)
398  {
399  return lhs <= rhs;
400  }
401 };
402 
403 template <>
404 struct less_equal<void, void>
405 {
406  template <typename Left, typename Right>
407  CK_TILE_HOST_DEVICE constexpr auto operator()(const Left& lhs, const Right& rhs) const
408  -> decltype(lhs <= rhs)
409  {
410  return lhs <= rhs;
411  }
412 };
413 
415 __host__ __device__ less_equal()->less_equal<void, void>;
416 
417 template <>
418 struct less_equal<float, float>
419 {
420  CK_TILE_HOST_DEVICE constexpr bool operator()(float lhs, float rhs) const
421  {
422  return lhs < rhs || bit_cast<uint32_t>(lhs) == bit_cast<uint32_t>(rhs);
423  }
424 };
425 
426 template <>
427 struct less_equal<double, double>
428 {
429  CK_TILE_HOST_DEVICE constexpr bool operator()(double lhs, double rhs) const
430  {
431  return lhs < rhs || bit_cast<uint64_t>(lhs) == bit_cast<uint64_t>(rhs);
432  }
433 };
434 
435 CK_TILE_HOST_DEVICE constexpr int32_t next_power_of_two(int32_t x)
436 {
437  // TODO: x need to be 2 ~ 0x7fffffff. 0, 1, or larger than 0x7fffffff will compile fail
438  return 1 << (32 - clz(x - 1));
439 }
440 
441 template <index_t X>
443 {
444  constexpr index_t y = next_power_of_two(X);
445  return number<y>{};
446 }
447 
448 template <index_t X>
450 {
451  constexpr index_t y = next_power_of_two(X);
452  return number<y>{};
453 }
454 
455 CK_TILE_HOST_DEVICE constexpr int32_t integer_log2_floor(int32_t x)
456 {
457  // TODO: x need to be 1 ~ 0x7fffffff
458  // __builtin_clz will produce unexpected result if x is 0;
459  return 31 - __builtin_clz(x);
460 }
461 
463 {
464  // TODO: x need to be 1 ~ 0x7fffffff
465  return x == (1 << integer_log2_floor(x));
466 }
467 
468 #ifndef C_LOG2E
469 #define C_LOG2E 1.44269504088896340736 // log2(e)
470 #endif
471 
472 template <typename T>
473 struct log2e;
474 
475 template <>
476 struct log2e<double>
477 {
478  static constexpr double value = C_LOG2E;
479 };
480 
481 template <>
482 struct log2e<float>
483 {
484  static constexpr float value = C_LOG2E;
485 };
486 
487 template <typename T = double>
488 constexpr T log2e_v = log2e<T>::value;
489 
491 float exp2(float x) { return exp2f(x); };
492 
494 float exp2(float x) { return std::exp2f(x); };
495 
496 CK_TILE_DEVICE uint16_t sad_u16(uint16_t x, uint16_t y, uint16_t acc)
497 {
498  return __builtin_amdgcn_sad_u16(x, y, acc);
499 }
500 
501 CK_TILE_DEVICE uint32_t sad_u32(uint32_t x, uint32_t y, uint32_t acc)
502 {
504  uint32_t res;
505  asm volatile("v_sad_u32 %0, %1, %2, %3" : "=v"(res) : "v"(x), "v"(y), "v"(acc));
506  return res;
507 }
508 
509 CK_TILE_HOST uint32_t sad_u32(uint32_t x, uint32_t y, uint32_t acc)
510 {
511  return (x > y ? (x - y) : (y - x)) + acc;
512 }
513 
515 
516 } // namespace ck_tile
517 // blow function need data type pre-defined
522 #ifndef __HIP_DEVICE_COMPILE__
523 #include <cmath>
524 #endif
525 
526 namespace ck_tile {
527 #if CK_TILE_WORKAROUND_SWDEV_383542
528 extern "C" CK_TILE_DEVICE float __ocml_native_recip_f32(float);
529 #endif
530 
531 // math functions for the host, some are implemented by calling C++ std functions
532 
533 CK_TILE_HOST float abs(float x) { return std::abs(x); };
534 
535 CK_TILE_HOST double abs(double x) { return std::abs(x); };
536 
538 {
539  int8_t sgn = x >> (8 - 1);
540 
541  return (x ^ sgn) - sgn;
542 };
543 
544 CK_TILE_HOST int32_t abs(int32_t x)
545 {
546  int32_t sgn = x >> (32 - 1);
547 
548  return (x ^ sgn) - sgn;
549 };
550 
552 {
553  uint16_t xx = bit_cast<uint16_t>(x);
554 
555  uint16_t abs_xx = xx & 0x7fff;
556 
557  fp16_t abs_x = bit_cast<fp16_t>(abs_xx);
558 
559  return abs_x;
560 };
561 
562 #ifdef CK_TILE_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
564 {
565  int4_t sgn = x >> (4 - 1);
566  return (x ^ sgn) - sgn;
567 }
568 #endif
569 
570 CK_TILE_HOST bool isnan(float x) { return std::isnan(x); };
571 
572 CK_TILE_HOST bool isnan(double x) { return std::isnan(x); };
573 
575 {
576  (void)x;
577  return false;
578 };
579 
580 CK_TILE_HOST bool isnan(int32_t x)
581 {
582  (void)x;
583  return false;
584 };
585 
587 {
588  uint16_t xx = bit_cast<uint16_t>(x);
589 
590  return (xx & 0x7FFF) > 0x7C00;
591 };
592 
593 #ifdef CK_TILE_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
594 CK_TILE_HOST bool isnan(int4_t x)
595 {
596  (void)x;
597  return false;
598 };
599 #endif
600 
602 {
603  return static_cast<fp16_t>(std::sqrt(static_cast<float>(x)));
604 };
605 
606 CK_TILE_HOST float sqrt(float x) { return std::sqrt(x); };
607 
608 CK_TILE_HOST double sqrt(double x) { return std::sqrt(x); };
609 
610 template <typename T>
612 {
613  return type_convert<T>(std::tanhf(type_convert<float>(x)));
614 };
615 
616 template <>
617 CK_TILE_HOST float tanh<float>(float x)
618 {
619  return std::tanhf(x);
620 };
621 
622 template <>
623 CK_TILE_HOST double tanh<double>(double x)
624 {
625  return std::tanh(x);
626 };
627 
628 template <typename T>
630 {
631  return type_convert<T>(std::acosf(type_convert<float>(x)));
632 };
633 
634 template <>
635 CK_TILE_HOST float acos<float>(float x)
636 {
637  return std::acosf(x);
638 };
639 
640 template <>
641 CK_TILE_HOST double acos<double>(double x)
642 {
643  return std::acos(x);
644 };
645 
646 template <typename T>
648 {
649  return type_convert<T>(-(type_convert<float>(x)));
650 };
651 
652 template <>
653 CK_TILE_HOST float neg<float>(float x)
654 {
655  return -x;
656 };
657 
658 template <>
659 CK_TILE_HOST double neg<double>(double x)
660 {
661  return -x;
662 };
663 
664 template <>
665 CK_TILE_HOST int32_t neg<int32_t>(int32_t x)
666 {
667  return -x;
668 };
669 
670 template <>
672 {
673  return -x;
674 };
675 
676 template <typename T>
678 {
679  return type_convert<T>(std::atanf(type_convert<float>(x)));
680 };
681 
682 template <>
683 CK_TILE_HOST float atan<float>(float x)
684 {
685  return std::atanf(x);
686 };
687 
688 template <>
689 CK_TILE_HOST double atan<double>(double x)
690 {
691  return std::atan(x);
692 };
693 
694 template <typename T>
696 {
697  return type_convert<T>(std::sinf(type_convert<float>(x)));
698 };
699 
700 template <>
701 CK_TILE_HOST float sin<float>(float x)
702 {
703  return std::sinf(x);
704 };
705 
706 template <>
707 CK_TILE_HOST double sin<double>(double x)
708 {
709  return std::sin(x);
710 };
711 
712 template <typename T>
714 {
715  return type_convert<T>(std::asinf(type_convert<float>(x)));
716 };
717 
718 template <>
719 CK_TILE_HOST float asin<float>(float x)
720 {
721  return std::asinf(x);
722 };
723 
724 template <>
725 CK_TILE_HOST double asin<double>(double x)
726 {
727  return std::asin(x);
728 };
729 
730 template <typename T>
732 {
733  return type_convert<T>(std::asinhf(type_convert<float>(x)));
734 };
735 
736 template <>
738 {
739  return std::asinhf(x);
740 };
741 
742 template <>
743 CK_TILE_HOST double asinh<double>(double x)
744 {
745  return std::asinh(x);
746 };
747 
748 template <typename T>
750 {
751  return type_convert<T>(std::cosf(type_convert<float>(x)));
752 };
753 
754 template <>
755 CK_TILE_HOST float cos<float>(float x)
756 {
757  return std::cosf(x);
758 };
759 
760 template <>
761 CK_TILE_HOST double cos<double>(double x)
762 {
763  return std::cos(x);
764 };
765 
766 template <typename T>
768 {
769  return type_convert<T>(std::acoshf(type_convert<float>(x)));
770 };
771 
772 template <>
774 {
775  return std::acoshf(x);
776 };
777 
778 template <>
779 CK_TILE_HOST double acosh<double>(double x)
780 {
781  return std::acosh(x);
782 };
783 
784 template <typename T>
786 {
787  return type_convert<T>(std::tanf(type_convert<float>(x)));
788 };
789 
790 template <>
791 CK_TILE_HOST float tan<float>(float x)
792 {
793  return std::tanf(x);
794 };
795 
796 template <>
797 CK_TILE_HOST double tan<double>(double x)
798 {
799  return std::tan(x);
800 };
801 
802 template <typename T>
804 {
805  return type_convert<T>(std::atanhf(type_convert<float>(x)));
806 };
807 
808 template <>
810 {
811  return std::atanhf(x);
812 };
813 
814 template <>
815 CK_TILE_HOST double atanh<double>(double x)
816 {
817  return std::atanh(x);
818 };
819 
820 template <typename T>
822 {
823  return type_convert<T>(std::sinhf(type_convert<float>(x)));
824 };
825 
826 template <>
827 CK_TILE_HOST float sinh<float>(float x)
828 {
829  return std::sinhf(x);
830 };
831 
832 template <>
833 CK_TILE_HOST double sinh<double>(double x)
834 {
835  return std::sinh(x);
836 };
837 
838 template <typename T>
840 {
841  return type_convert<T>(std::ceilf(type_convert<float>(x)));
842 };
843 
844 template <>
845 CK_TILE_HOST float ceil<float>(float x)
846 {
847  return std::ceilf(x);
848 };
849 
850 template <>
851 CK_TILE_HOST double ceil<double>(double x)
852 {
853  return std::ceil(x);
854 };
855 
856 template <typename T>
858 {
859  return type_convert<T>(std::coshf(type_convert<float>(x)));
860 };
861 
862 template <>
863 CK_TILE_HOST float cosh<float>(float x)
864 {
865  return std::coshf(x);
866 };
867 
868 template <>
869 CK_TILE_HOST double cosh<double>(double x)
870 {
871  return std::cosh(x);
872 };
873 
874 template <typename T>
876 {
877  return type_convert<T>(std::floorf(type_convert<float>(x)));
878 };
879 
880 template <>
882 {
883  return std::floorf(x);
884 };
885 
886 template <>
887 CK_TILE_HOST double floor<double>(double x)
888 {
889  return std::floor(x);
890 };
891 
892 template <typename T>
894 {
895  return type_convert<T>(1.f / type_convert<float>(x));
896 };
897 
898 template <typename T>
900 {
901  return type_convert<T>(std::expf(type_convert<float>(x)));
902 }
903 
904 template <>
905 CK_TILE_HOST float exp<float>(float x)
906 {
907  return std::expf(x);
908 }
909 
910 template <>
911 CK_TILE_HOST double exp<double>(double x)
912 {
913  return std::exp(x);
914 }
915 
916 template <typename T>
918 {
919  return type_convert<T>(std::logf(type_convert<float>(x)));
920 }
921 
922 template <>
923 CK_TILE_HOST float log<float>(float x)
924 {
925  return std::logf(x);
926 }
927 
928 template <>
929 CK_TILE_HOST double log<double>(double x)
930 {
931  return std::log(x);
932 }
933 
934 template <typename T>
935 CK_TILE_HOST T pow(T x, T gamma)
936 {
937  return type_convert<T>(std::powf(type_convert<float>(x), type_convert<float>(gamma)));
938 }
939 
940 template <>
941 CK_TILE_HOST float pow<float>(float x, float gamma)
942 {
943  return std::powf(x, gamma);
944 }
945 
946 template <>
947 CK_TILE_HOST double pow<double>(double x, double gamma)
948 {
949  return std::pow(x, gamma);
950 }
951 
952 template <typename T>
954 {
955  return type_convert<T>(std::expm1f(type_convert<float>(x)));
956 }
957 
958 template <>
960 {
961  return std::expm1f(x);
962 }
963 
964 template <>
965 CK_TILE_HOST double expm1<double>(double x)
966 {
967  return std::expm1(x);
968 }
969 
970 // math functions for the HIP kernel, some are implemented by calling hip builtin functions
971 
972 CK_TILE_DEVICE float abs(float x)
973 {
974  union
975  {
976  float f32;
977  uint32_t u32;
978  } y;
979  y.f32 = x;
980  y.u32 = y.u32 & 0x7fffffff;
981  return y.f32;
982 };
983 
984 CK_TILE_DEVICE double abs(double x) { return ::abs(x); };
985 
987 {
988  int8_t sgn = x >> (8 - 1);
989 
990  return (x ^ sgn) - sgn;
991 };
992 
993 CK_TILE_DEVICE int32_t abs(int32_t x)
994 {
995  int32_t sgn = x >> (32 - 1);
996 
997  return (x ^ sgn) - sgn;
998 };
999 
1000 #ifdef CK_TILE_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
1002 {
1003  int4_t sgn = x >> (4 - 1);
1004 
1005  return (x ^ sgn) - sgn;
1006 };
1007 #endif
1008 
1010 {
1011  uint16_t xx = bit_cast<uint16_t>(x);
1012 
1013  uint16_t abs_xx = xx & 0x7fff;
1014 
1015  fp16_t abs_x = bit_cast<fp16_t>(abs_xx);
1016 
1017  return abs_x;
1018 };
1019 
1020 CK_TILE_DEVICE bool isnan(float x) { return ::isnan(x); };
1021 
1022 CK_TILE_DEVICE bool isnan(double x) { return ::isnan(x); };
1023 
1024 CK_TILE_DEVICE bool isnan(int8_t x)
1025 {
1026  (void)x;
1027  return false;
1028 };
1029 
1030 CK_TILE_DEVICE bool isnan(int32_t x)
1031 {
1032  (void)x;
1033  return false;
1034 };
1035 
1036 #ifdef CK_TILE_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
1037 CK_TILE_DEVICE bool isnan(int4_t x)
1038 {
1039  (void)x;
1040  return false;
1041 };
1042 #endif
1043 
1044 CK_TILE_DEVICE bool isnan(fp16_t x)
1045 {
1046  uint16_t xx = bit_cast<uint16_t>(x);
1047 
1048  return (xx & 0x7FFF) > 0x7C00;
1049 };
1050 
1052 {
1053  return static_cast<fp16_t>(__builtin_amdgcn_sqrtf(static_cast<float>(x)));
1054 };
1055 
1056 CK_TILE_DEVICE float sqrt(float x) { return __builtin_amdgcn_sqrtf(x); };
1057 
1058 CK_TILE_DEVICE double sqrt(double x) { return __builtin_amdgcn_sqrt(x); };
1059 
1060 template <typename T>
1062 {
1063  return type_convert<T>(::tanhf(type_convert<float>(x)));
1064 };
1065 
1066 template <>
1067 CK_TILE_DEVICE float tanh<float>(float x)
1068 {
1069  return ::tanhf(x);
1070 };
1071 
1072 template <>
1073 CK_TILE_DEVICE double tanh<double>(double x)
1074 {
1075  return ::tanh(x);
1076 };
1077 
1078 template <typename T>
1080 {
1081  return type_convert<T>(::acosf(type_convert<float>(x)));
1082 };
1083 
1084 template <>
1085 CK_TILE_DEVICE float acos<float>(float x)
1086 {
1087  return ::acosf(x);
1088 };
1089 
1090 template <>
1091 CK_TILE_DEVICE double acos<double>(double x)
1092 {
1093  return ::acos(x);
1094 };
1095 
1096 template <typename T>
1098 {
1099  return type_convert<T>(-(type_convert<float>(x)));
1100 };
1101 
1102 template <>
1103 CK_TILE_DEVICE float neg<float>(float x)
1104 {
1105  return -x;
1106 };
1107 
1108 template <>
1109 CK_TILE_DEVICE double neg<double>(double x)
1110 {
1111  return -x;
1112 };
1113 
1114 template <>
1115 CK_TILE_DEVICE int32_t neg<int32_t>(int32_t x)
1116 {
1117  return -x;
1118 };
1119 
1120 template <>
1122 {
1123  return -x;
1124 };
1125 
1126 template <>
1128 {
1129  return -x;
1130 };
1131 
1132 template <typename T>
1134 {
1135  return type_convert<T>(::atanf(type_convert<float>(x)));
1136 };
1137 
1138 template <>
1139 CK_TILE_DEVICE float atan<float>(float x)
1140 {
1141  return ::atanf(x);
1142 };
1143 
1144 template <>
1145 CK_TILE_DEVICE double atan<double>(double x)
1146 {
1147  return ::atan(x);
1148 };
1149 
1150 template <typename T>
1152 {
1153  return type_convert<T>(::sinf(type_convert<float>(x)));
1154 };
1155 
1156 template <>
1157 CK_TILE_DEVICE float sin<float>(float x)
1158 {
1159  return ::sinf(x);
1160 };
1161 
1162 template <>
1163 CK_TILE_DEVICE double sin<double>(double x)
1164 {
1165  return ::sin(x);
1166 };
1167 
1168 template <>
1170 {
1171  return __ocml_sin_f16(x);
1172 };
1173 
1174 template <typename T>
1176 {
1177  return type_convert<T>(::asinf(type_convert<float>(x)));
1178 };
1179 
1180 template <>
1181 CK_TILE_DEVICE float asin<float>(float x)
1182 {
1183  return ::asinf(x);
1184 };
1185 
1186 template <>
1187 CK_TILE_DEVICE double asin<double>(double x)
1188 {
1189  return ::asin(x);
1190 };
1191 
1192 template <typename T>
1194 {
1195  return type_convert<T>(::asinhf(type_convert<float>(x)));
1196 };
1197 
1198 template <>
1199 CK_TILE_DEVICE float asinh<float>(float x)
1200 {
1201  return ::asinhf(x);
1202 };
1203 
1204 template <>
1205 CK_TILE_DEVICE double asinh<double>(double x)
1206 {
1207  return ::asinh(x);
1208 };
1209 
1210 template <typename T>
1212 {
1213  return type_convert<T>(::acoshf(type_convert<float>(x)));
1214 };
1215 
1216 template <>
1217 CK_TILE_DEVICE float acosh<float>(float x)
1218 {
1219  return ::acoshf(x);
1220 };
1221 
1222 template <>
1223 CK_TILE_DEVICE double acosh<double>(double x)
1224 {
1225  return ::acosh(x);
1226 };
1227 
1228 template <typename T>
1230 {
1231  return type_convert<T>(::tanf(type_convert<float>(x)));
1232 };
1233 
1234 template <>
1235 CK_TILE_DEVICE float tan<float>(float x)
1236 {
1237  return ::tanf(x);
1238 };
1239 
1240 template <>
1241 CK_TILE_DEVICE double tan<double>(double x)
1242 {
1243  return ::tan(x);
1244 };
1245 
1246 template <typename T>
1248 {
1249  return type_convert<T>(::atanhf(type_convert<float>(x)));
1250 };
1251 
1252 template <>
1253 CK_TILE_DEVICE float atanh<float>(float x)
1254 {
1255  return ::atanhf(x);
1256 };
1257 
1258 template <>
1259 CK_TILE_DEVICE double atanh<double>(double x)
1260 {
1261  return ::atanh(x);
1262 };
1263 
1264 template <typename T>
1266 {
1267  return type_convert<T>(::sinhf(type_convert<float>(x)));
1268 };
1269 
1270 template <>
1271 CK_TILE_DEVICE float sinh<float>(float x)
1272 {
1273  return ::sinhf(x);
1274 };
1275 
1276 template <>
1277 CK_TILE_DEVICE double sinh<double>(double x)
1278 {
1279  return ::sinh(x);
1280 };
1281 
1282 template <typename T>
1284 {
1285  return type_convert<T>(::ceilf(type_convert<float>(x)));
1286 };
1287 
1288 template <>
1289 CK_TILE_DEVICE float ceil<float>(float x)
1290 {
1291  return ::ceilf(x);
1292 };
1293 
1294 template <>
1295 CK_TILE_DEVICE double ceil<double>(double x)
1296 {
1297  return ::ceil(x);
1298 };
1299 
1300 template <>
1302 {
1303  return __ocml_ceil_f16(x);
1304 };
1305 
1306 template <typename T>
1308 {
1309  return type_convert<T>(::coshf(type_convert<float>(x)));
1310 };
1311 
1312 template <>
1313 CK_TILE_DEVICE float cosh<float>(float x)
1314 {
1315  return ::coshf(x);
1316 };
1317 
1318 template <>
1319 CK_TILE_DEVICE double cosh<double>(double x)
1320 {
1321  return ::cosh(x);
1322 };
1323 
1324 template <typename T>
1326 {
1327  return type_convert<T>(::floorf(type_convert<float>(x)));
1328 };
1329 
1330 template <>
1331 CK_TILE_DEVICE float floor<float>(float x)
1332 {
1333  return ::floorf(x);
1334 };
1335 
1336 template <>
1337 CK_TILE_DEVICE double floor<double>(double x)
1338 {
1339  return ::floor(x);
1340 };
1341 
1342 template <>
1344 {
1345  return __ocml_floor_f16(x);
1346 };
1347 
1348 template <typename T>
1350 {
1351 #if !CK_TILE_WORKAROUND_SWDEV_383542
1352  return __frcp_rn(x);
1353 #else
1354  // return __ocml_native_recip_f32(x);
1355  return __builtin_amdgcn_rcpf(x);
1356 #endif
1357 };
1358 
1359 template <typename T>
1361 {
1362  return type_convert<T>(__ocml_exp_f32(type_convert<float>(x)));
1363 };
1364 
1365 template <>
1367 {
1368  return __ocml_exp_f16(x);
1369 };
1370 
1371 template <>
1372 CK_TILE_DEVICE float exp<float>(float x)
1373 {
1374  return __ocml_exp_f32(x);
1375 };
1376 
1377 template <>
1378 CK_TILE_DEVICE double exp<double>(double x)
1379 {
1380  return exp(x);
1381 };
1382 
1383 template <typename T>
1385 {
1386  return type_convert<T>(__logf(type_convert<float>(x)));
1387 };
1388 
1389 template <>
1391 {
1392  return __ocml_log_f16(x);
1393 };
1394 
1395 template <>
1396 CK_TILE_DEVICE float log<float>(float x)
1397 {
1398  return __logf(x);
1399 };
1400 
1401 template <>
1402 CK_TILE_DEVICE double log<double>(double x)
1403 {
1404  return log(x);
1405 };
1406 
1407 template <typename T>
1408 CK_TILE_DEVICE T pow(T x, T gamma)
1409 {
1410  return type_convert<T>(powf(type_convert<float>(x), type_convert<float>(gamma)));
1411 };
1412 
1413 template <>
1414 CK_TILE_DEVICE float pow<float>(float x, float gamma)
1415 {
1416  return powf(x, gamma);
1417 };
1418 
1419 template <>
1420 CK_TILE_DEVICE double pow<double>(double x, double gamma)
1421 {
1422  return pow(x, gamma);
1423 };
1424 
1425 template <typename T>
1427 {
1428  return type_convert<T>(expm1f(type_convert<float>(x)));
1429 };
1430 
1431 template <>
1432 CK_TILE_DEVICE float expm1<float>(float x)
1433 {
1434  return expm1f(x);
1435 };
1436 
1437 template <>
1438 CK_TILE_DEVICE double expm1<double>(double x)
1439 {
1440  return expm1(x);
1441 };
1442 
1443 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_HOST
Definition: config.hpp:39
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
__host__ __device__ less_equal() -> less_equal< void, void >
FIXME: create macro to replace 'host device' and nothing more.
CK_TILE_DEVICE T pow(T x, T gamma)
Definition: math.hpp:1408
CK_TILE_DEVICE T tanh(T x)
Definition: math.hpp:1061
CK_TILE_DEVICE bfloat16_t log(bfloat16_t x)
Definition: bfloat16.hpp:423
CK_TILE_HOST bool isnan(fp16_t x)
Definition: math.hpp:586
constexpr CK_TILE_HOST_DEVICE T clamp(const T &x, const T &lowerbound, const T &upperbound)
Definition: math.hpp:259
CK_TILE_HOST double tan< double >(double x)
Definition: math.hpp:797
constexpr CK_TILE_HOST_DEVICE auto integer_least_multiple(X x, Y y)
Definition: math.hpp:155
CK_TILE_HOST T acos(T x)
Definition: math.hpp:629
__host__ __device__ multiplies() -> multiplies< void, void >
FIXME: create macro to replace 'host device' and nothing more.
CK_TILE_DEVICE T exp(T x)
Definition: math.hpp:1360
CK_TILE_HOST int32_t neg< int32_t >(int32_t x)
Definition: math.hpp:665
constexpr CK_TILE_HOST_DEVICE auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:149
CK_TILE_HOST double atan< double >(double x)
Definition: math.hpp:689
CK_TILE_HOST float tan< float >(float x)
Definition: math.hpp:791
CK_TILE_HOST float asinh< float >(float x)
Definition: math.hpp:737
CK_TILE_HOST double pow< double >(double x, double gamma)
Definition: math.hpp:947
CK_TILE_HOST fp16_t abs(fp16_t x)
Definition: math.hpp:551
CK_TILE_DEVICE fp16_t exp< fp16_t >(fp16_t x)
Definition: math.hpp:1366
CK_TILE_HOST float tanh< float >(float x)
Definition: math.hpp:617
constexpr CK_TILE_HOST_DEVICE bool is_power_of_two_integer(int32_t x)
Definition: math.hpp:462
_Float16 fp16_t
Definition: half.hpp:110
CK_TILE_HOST double sinh< double >(double x)
Definition: math.hpp:833
CK_TILE_DEVICE T tan(T x)
Definition: math.hpp:1229
int8_t int8_t
Definition: int8.hpp:20
CK_TILE_HOST T cos(T x)
Definition: math.hpp:749
constexpr CK_TILE_HOST_DEVICE int32_t next_power_of_two(int32_t x)
Definition: math.hpp:435
CK_TILE_DEVICE T acosh(T x)
Definition: math.hpp:1211
CK_TILE_DEVICE fp16_t ceil< fp16_t >(fp16_t x)
Definition: math.hpp:1301
CK_TILE_HOST float log< float >(float x)
Definition: math.hpp:923
CK_TILE_HOST double asinh< double >(double x)
Definition: math.hpp:743
CK_TILE_HOST float atanh< float >(float x)
Definition: math.hpp:809
CK_TILE_HOST float cos< float >(float x)
Definition: math.hpp:755
CK_TILE_DEVICE T log(T x)
Definition: math.hpp:1384
constexpr T log2e_v
Definition: math.hpp:488
CK_TILE_HOST float neg< float >(float x)
Definition: math.hpp:653
CK_TILE_HOST T ceil(T x)
Definition: math.hpp:839
CK_TILE_HOST T acosh(T x)
Definition: math.hpp:767
CK_TILE_DEVICE T sin(T x)
Definition: math.hpp:1151
CK_TILE_HOST T expm1(T x)
Definition: math.hpp:953
__host__ __device__ minus() -> minus< void, void >
FIXME: create macro to replace 'host device' and nothing more.
CK_TILE_HOST double atanh< double >(double x)
Definition: math.hpp:815
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE T floor(T x)
Definition: math.hpp:1325
CK_TILE_HOST float pow< float >(float x, float gamma)
Definition: math.hpp:941
CK_TILE_DEVICE fp16_t log< fp16_t >(fp16_t x)
Definition: math.hpp:1390
CK_TILE_DEVICE T expm1(T x)
Definition: math.hpp:1426
CK_TILE_HOST double cos< double >(double x)
Definition: math.hpp:761
CK_TILE_HOST T tanh(T x)
Definition: math.hpp:611
constexpr CK_TILE_HOST_DEVICE index_t gcd(index_t x, index_t y)
Definition: math.hpp:268
CK_TILE_DEVICE bfloat16_t sqrt(bfloat16_t x)
Definition: bfloat16.hpp:408
CK_TILE_DEVICE T acos(T x)
Definition: math.hpp:1079
CK_TILE_DEVICE T atan(T x)
Definition: math.hpp:1133
__host__ __device__ less() -> less< void, void >
FIXME: create macro to replace 'host device' and nothing more.
__host__ __device__ scales(Scale) -> scales< Scale >
FIXME: create macro to replace 'host device' and nothing more.
CK_TILE_HOST float asin< float >(float x)
Definition: math.hpp:719
CK_TILE_HOST double ceil< double >(double x)
Definition: math.hpp:851
constexpr CK_TILE_HOST_DEVICE int32_t integer_log2_floor(int32_t x)
Definition: math.hpp:455
CK_TILE_DEVICE fp16_t neg< fp16_t >(fp16_t x)
Definition: math.hpp:1127
CK_TILE_HOST double neg< double >(double x)
Definition: math.hpp:659
CK_TILE_HOST float atan< float >(float x)
Definition: math.hpp:683
CK_TILE_HOST T atan(T x)
Definition: math.hpp:677
CK_TILE_DEVICE fp16_t floor< fp16_t >(fp16_t x)
Definition: math.hpp:1343
Right
Definition: math.hpp:327
constexpr CK_TILE_HOST_DEVICE auto integer_divide_floor(X x, Y y)
Definition: math.hpp:143
CK_TILE_HOST double floor< double >(double x)
Definition: math.hpp:887
CK_TILE_HOST T sin(T x)
Definition: math.hpp:695
__host__ __device__ equal() -> equal< void, void >
FIXME: create macro to replace 'host device' and nothing more.
CK_TILE_HOST float acos< float >(float x)
Definition: math.hpp:635
CK_TILE_DEVICE uint32_t sad_u32(uint32_t x, uint32_t y, uint32_t acc)
Definition: math.hpp:501
CK_TILE_HOST T floor(T x)
Definition: math.hpp:875
CK_TILE_HOST T sinh(T x)
Definition: math.hpp:821
CK_TILE_HOST T asin(T x)
Definition: math.hpp:713
CK_TILE_DEVICE T atanh(T x)
Definition: math.hpp:1247
CK_TILE_HOST double tanh< double >(double x)
Definition: math.hpp:623
CK_TILE_HOST T asinh(T x)
Definition: math.hpp:731
CK_TILE_DEVICE bfloat16_t exp(bfloat16_t x)
Definition: bfloat16.hpp:414
CK_TILE_HOST double sin< double >(double x)
Definition: math.hpp:707
CK_TILE_HOST float exp< float >(float x)
Definition: math.hpp:905
CK_TILE_HOST float floor< float >(float x)
Definition: math.hpp:881
CK_TILE_HOST int clz(uint32_t x)
Definition: math.hpp:264
CK_TILE_HOST int8_t neg< int8_t >(int8_t x)
Definition: math.hpp:671
CK_TILE_HOST double acos< double >(double x)
Definition: math.hpp:641
CK_TILE_HOST float acosh< float >(float x)
Definition: math.hpp:773
CK_TILE_DEVICE uint16_t sad_u16(uint16_t x, uint16_t y, uint16_t acc)
Definition: math.hpp:496
CK_TILE_HOST double acosh< double >(double x)
Definition: math.hpp:779
Y constexpr CK_TILE_HOST_DEVICE auto lcm(X x, Y y)
Definition: math.hpp:314
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:395
CK_TILE_HOST T atanh(T x)
Definition: math.hpp:803
CK_TILE_HOST T neg(T x)
Definition: math.hpp:647
CK_TILE_HOST double asin< double >(double x)
Definition: math.hpp:725
CK_TILE_DEVICE T ceil(T x)
Definition: math.hpp:1283
CK_TILE_DEVICE T asin(T x)
Definition: math.hpp:1175
CK_TILE_HOST double cosh< double >(double x)
Definition: math.hpp:869
CK_TILE_DEVICE T sinh(T x)
Definition: math.hpp:1265
CK_TILE_DEVICE T cosh(T x)
Definition: math.hpp:1307
CK_TILE_HOST float expm1< float >(float x)
Definition: math.hpp:959
CK_TILE_HOST_DEVICE bool isnan(const bfloat16_t &x)
Definition: bfloat16.hpp:401
CK_TILE_HOST double sqrt(double x)
Definition: math.hpp:608
CK_TILE_DEVICE T asinh(T x)
Definition: math.hpp:1193
CK_TILE_HOST T tan(T x)
Definition: math.hpp:785
CK_TILE_DEVICE fp16_t sin< fp16_t >(fp16_t x)
Definition: math.hpp:1169
CK_TILE_HOST float sin< float >(float x)
Definition: math.hpp:701
CK_TILE_HOST T cosh(T x)
Definition: math.hpp:857
CK_TILE_HOST double expm1< double >(double x)
Definition: math.hpp:965
CK_TILE_HOST double log< double >(double x)
Definition: math.hpp:929
CK_TILE_HOST float cosh< float >(float x)
Definition: math.hpp:863
constexpr CK_TILE_HOST_DEVICE T min(T x)
Definition: math.hpp:210
CK_TILE_HOST float ceil< float >(float x)
Definition: math.hpp:845
constexpr CK_TILE_HOST_DEVICE T max(T x)
Definition: math.hpp:161
CK_TILE_HOST T pow(T x, T gamma)
Definition: math.hpp:935
CK_TILE_HOST T rcp(T x)
Definition: math.hpp:893
CK_TILE_HOST double exp< double >(double x)
Definition: math.hpp:911
CK_TILE_DEVICE bfloat16_t exp2(bfloat16_t x)
Definition: bfloat16.hpp:420
CK_TILE_HOST float sinh< float >(float x)
Definition: math.hpp:827
__host__ __device__ plus() -> plus< void, void >
FIXME: create macro to replace 'host device' and nothing more.
_BitInt(4) int4_t
Definition: data_type.hpp:26
std::enable_if< B, T > enable_if
Definition: enable_if.hpp:10
Definition: integral_constant.hpp:13
constexpr CK_TILE_HOST_DEVICE bool operator()(double lhs, double rhs) const
Definition: math.hpp:363
constexpr CK_TILE_HOST_DEVICE bool operator()(float lhs, float rhs) const
Definition: math.hpp:354
Definition: math.hpp:339
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs==rhs)
Definition: math.hpp:341
Definition: math.hpp:134
constexpr CK_TILE_HOST_DEVICE T operator()(T a, T b) const
Definition: math.hpp:135
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs< rhs)
Definition: math.hpp:383
constexpr CK_TILE_HOST_DEVICE bool operator()(double lhs, double rhs) const
Definition: math.hpp:429
constexpr CK_TILE_HOST_DEVICE bool operator()(float lhs, float rhs) const
Definition: math.hpp:420
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs<=rhs)
Definition: math.hpp:407
Definition: math.hpp:395
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs<=rhs)
Definition: math.hpp:396
Definition: math.hpp:371
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs< rhs)
Definition: math.hpp:372
Definition: math.hpp:473
Definition: math.hpp:122
constexpr CK_TILE_HOST_DEVICE T operator()(T a, T b) const
Definition: math.hpp:123
Definition: math.hpp:128
constexpr CK_TILE_HOST_DEVICE T operator()(T a, T b) const
Definition: math.hpp:129
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs - rhs)
Definition: math.hpp:86
Definition: math.hpp:74
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs - rhs)
Definition: math.hpp:75
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs *rhs)
Definition: math.hpp:110
Definition: math.hpp:98
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs *rhs)
Definition: math.hpp:99
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs+rhs)
Definition: math.hpp:62
Definition: math.hpp:50
constexpr CK_TILE_HOST_DEVICE auto operator()(const Left &lhs, const Right &rhs) const -> decltype(lhs+rhs)
Definition: math.hpp:51
Definition: math.hpp:18
constexpr CK_TILE_HOST_DEVICE auto operator()(const Right &rhs) const -> decltype(lhs *rhs)
Definition: math.hpp:20
Definition: math.hpp:28
constexpr CK_TILE_HOST_DEVICE auto operator()(const Right &rhs) const -> decltype(std::declval< const Scale & >() *rhs)
Definition: math.hpp:34
constexpr CK_TILE_HOST_DEVICE scales(Scale lhs)
Definition: math.hpp:31
#define C_LOG2E
Definition: math.hpp:469