/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.1.0/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File
util_type.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
31 #define HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
32 
33 #include <limits>
34 #include <type_traits>
35 
36 #include "../../config.hpp"
37 
38 #include <rocprim/detail/various.hpp>
39 #include <rocprim/types/future_value.hpp>
40 
41 #include <hip/hip_fp16.h>
42 #include <hip/hip_bfloat16.h>
43 
44 BEGIN_HIPCUB_NAMESPACE
45 
46 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
47 
48 using NullType = ::rocprim::empty_type;
49 
50 #endif
51 
52 template<bool B, typename T, typename F>
53 struct If
54 {
55  using Type = typename std::conditional<B, T, F>::type;
56 };
57 
58 template<typename T>
59 struct IsPointer
60 {
61  static constexpr bool VALUE = std::is_pointer<T>::value;
62 };
63 
64 template<typename T>
65 struct IsVolatile
66 {
67  static constexpr bool VALUE = std::is_volatile<T>::value;
68 };
69 
70 template<typename T>
72 {
73  using Type = typename std::remove_cv<T>::type;
74 };
75 
76 template<int N>
77 struct PowerOfTwo
78 {
79  static constexpr bool VALUE = ::rocprim::detail::is_power_of_two(N);
80 };
81 
82 namespace detail
83 {
84 
85 template<int N, int CURRENT_VAL = N, int COUNT = 0>
86 struct Log2Impl
87 {
88  static constexpr int VALUE = Log2Impl<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE;
89 };
90 
91 template<int N, int COUNT>
92 struct Log2Impl<N, 0, COUNT>
93 {
94  static constexpr int VALUE = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1;
95 };
96 
97 } // end of detail namespace
98 
99 template<int N>
100 struct Log2
101 {
102  static_assert(N != 0, "The logarithm of zero is undefined");
103  static constexpr int VALUE = detail::Log2Impl<N>::VALUE;
104 };
105 
106 template<typename T>
108 {
109  T * d_buffers[2];
110 
111  int selector;
112 
113  HIPCUB_HOST_DEVICE inline
114  DoubleBuffer()
115  {
116  selector = 0;
117  d_buffers[0] = nullptr;
118  d_buffers[1] = nullptr;
119  }
120 
121  HIPCUB_HOST_DEVICE inline
122  DoubleBuffer(T * d_current, T * d_alternate)
123  {
124  selector = 0;
125  d_buffers[0] = d_current;
126  d_buffers[1] = d_alternate;
127  }
128 
129  HIPCUB_HOST_DEVICE inline
130  T * Current()
131  {
132  return d_buffers[selector];
133  }
134 
135  HIPCUB_HOST_DEVICE inline
136  T * Alternate()
137  {
138  return d_buffers[selector ^ 1];
139  }
140 };
141 
142 template <int A>
143 struct Int2Type
144 {
145  enum {VALUE = A};
146 };
147 
148 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
149 
150 template<
151  class Key,
152  class Value
153 >
154 using KeyValuePair = ::rocprim::key_value_pair<Key, Value>;
155 
156 #endif
157 
158 template <typename T, typename Iter = T*>
159 using FutureValue = ::rocprim::future_value<T, Iter>;
160 
161 namespace detail
162 {
163 
164 template<typename T>
165 inline
166 ::rocprim::double_buffer<T> to_double_buffer(DoubleBuffer<T>& source)
167 {
168  return ::rocprim::double_buffer<T>(source.Current(), source.Alternate());
169 }
170 
171 template<typename T>
172 inline
173 void update_double_buffer(DoubleBuffer<T>& target, ::rocprim::double_buffer<T>& source)
174 {
175  if(target.Current() != source.current())
176  {
177  target.selector ^= 1;
178  }
179 }
180 
181 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
182 
183 template <typename T>
184 using is_integral_or_enum =
185  std::integral_constant<bool, std::is_integral<T>::value || std::is_enum<T>::value>;
186 
187 #endif
188 
189 }
190 
191 template <typename NumeratorT, typename DenominatorT>
192 __host__ __device__ __forceinline__ constexpr NumeratorT
193 DivideAndRoundUp(NumeratorT n, DenominatorT d)
194 {
195  static_assert(hipcub::detail::is_integral_or_enum<NumeratorT>::value &&
196  hipcub::detail::is_integral_or_enum<DenominatorT>::value,
197  "DivideAndRoundUp is only intended for integral types.");
198 
199  // Static cast to undo integral promotion.
200  return static_cast<NumeratorT>(n / d + (n % d != 0 ? 1 : 0));
201 }
202 
203 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
204 
205 /******************************************************************************
206  * Size and alignment
207  ******************************************************************************/
208 
210 template <typename T>
211 struct AlignBytes
212 {
213  struct Pad
214  {
215  T val;
216  char byte;
217  };
218 
219  enum
220  {
222  ALIGN_BYTES = sizeof(Pad) - sizeof(T)
223  };
224 
226  typedef T Type;
227 };
228 
229 // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree
230 // with device C++ compilers (EDG) on types passed as template parameters through
231 // kernel functions
232 
233 #define __HIPCUB_ALIGN_BYTES(t, b) \
234  template <> struct AlignBytes<t> \
235  { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; };
236 
237 __HIPCUB_ALIGN_BYTES(short4, 8)
238 __HIPCUB_ALIGN_BYTES(ushort4, 8)
239 __HIPCUB_ALIGN_BYTES(int2, 8)
240 __HIPCUB_ALIGN_BYTES(uint2, 8)
241 __HIPCUB_ALIGN_BYTES(long long, 8)
242 __HIPCUB_ALIGN_BYTES(unsigned long long, 8)
243 __HIPCUB_ALIGN_BYTES(float2, 8)
244 __HIPCUB_ALIGN_BYTES(double, 8)
245 #ifdef _WIN32
246  __HIPCUB_ALIGN_BYTES(long2, 8)
247  __HIPCUB_ALIGN_BYTES(ulong2, 8)
248 #else
249  __HIPCUB_ALIGN_BYTES(long2, 16)
250  __HIPCUB_ALIGN_BYTES(ulong2, 16)
251 #endif
252 __HIPCUB_ALIGN_BYTES(int4, 16)
253 __HIPCUB_ALIGN_BYTES(uint4, 16)
254 __HIPCUB_ALIGN_BYTES(float4, 16)
255 __HIPCUB_ALIGN_BYTES(long4, 16)
256 __HIPCUB_ALIGN_BYTES(ulong4, 16)
257 __HIPCUB_ALIGN_BYTES(longlong2, 16)
258 __HIPCUB_ALIGN_BYTES(ulonglong2, 16)
259 __HIPCUB_ALIGN_BYTES(double2, 16)
260 __HIPCUB_ALIGN_BYTES(longlong4, 16)
261 __HIPCUB_ALIGN_BYTES(ulonglong4, 16)
262 __HIPCUB_ALIGN_BYTES(double4, 16)
263 
264 template <typename T> struct AlignBytes<volatile T> : AlignBytes<T> {};
265 template <typename T> struct AlignBytes<const T> : AlignBytes<T> {};
266 template <typename T> struct AlignBytes<const volatile T> : AlignBytes<T> {};
267 
268 
270 template <typename T>
271 struct UnitWord
272 {
273  enum {
274  ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES
275  };
276 
277  template <typename Unit>
278  struct IsMultiple
279  {
280  enum {
281  UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES,
282  IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (int(ALIGN_BYTES) % int(UNIT_ALIGN_BYTES) == 0)
283  };
284  };
285 
287  typedef typename If<IsMultiple<int>::IS_MULTIPLE,
288  unsigned int,
289  typename If<IsMultiple<short>::IS_MULTIPLE,
290  unsigned short,
291  unsigned char>::Type>::Type ShuffleWord;
292 
294  typedef typename If<IsMultiple<long long>::IS_MULTIPLE,
295  unsigned long long,
296  ShuffleWord>::Type VolatileWord;
297 
299  typedef typename If<IsMultiple<longlong2>::IS_MULTIPLE,
300  ulonglong2,
301  VolatileWord>::Type DeviceWord;
302 
304  typedef typename If<IsMultiple<int4>::IS_MULTIPLE,
305  uint4,
306  typename If<IsMultiple<int2>::IS_MULTIPLE,
307  uint2,
308  ShuffleWord>::Type>::Type TextureWord;
309 };
310 
311 
312 // float2 specialization workaround (for SM10-SM13)
313 template <>
314 struct UnitWord <float2>
315 {
316  typedef int ShuffleWord;
317  typedef unsigned long long VolatileWord;
318  typedef unsigned long long DeviceWord;
319  typedef float2 TextureWord;
320 };
321 
322 // float4 specialization workaround (for SM10-SM13)
323 template <>
324 struct UnitWord <float4>
325 {
326  typedef int ShuffleWord;
327  typedef unsigned long long VolatileWord;
328  typedef ulonglong2 DeviceWord;
329  typedef float4 TextureWord;
330 };
331 
332 
333 // char2 specialization workaround (for SM10-SM13)
334 template <>
335 struct UnitWord <char2>
336 {
337  typedef unsigned short ShuffleWord;
338  typedef unsigned short VolatileWord;
339  typedef unsigned short DeviceWord;
340  typedef unsigned short TextureWord;
341 };
342 
343 
344 template <typename T> struct UnitWord<volatile T> : UnitWord<T> {};
345 template <typename T> struct UnitWord<const T> : UnitWord<T> {};
346 template <typename T> struct UnitWord<const volatile T> : UnitWord<T> {};
347 
348 
349 #endif // DOXYGEN_SHOULD_SKIP_THIS
350 
351 
352 
353 
354 /******************************************************************************
355  * Wrapper types
356  ******************************************************************************/
357 
361 template <typename T>
363 {
365  typedef typename UnitWord<T>::DeviceWord DeviceWord;
366 
367  enum
368  {
369  WORDS = sizeof(T) / sizeof(DeviceWord)
370  };
371 
373  DeviceWord storage[WORDS];
374 
376  __host__ __device__ __forceinline__ T& Alias()
377  {
378  return reinterpret_cast<T&>(*this);
379  }
380 };
381 
382 
383 /******************************************************************************
384  * Simple type traits utilities.
385  *
386  * For example:
387  * Traits<int>::CATEGORY // SIGNED_INTEGER
388  * Traits<NullType>::NULL_TYPE // true
389  * Traits<uint4>::CATEGORY // NOT_A_NUMBER
390  * Traits<uint4>::PRIMITIVE; // false
391  *
392  ******************************************************************************/
393 
394  #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
395 
399 enum Category
400 {
401  NOT_A_NUMBER,
402  SIGNED_INTEGER,
403  UNSIGNED_INTEGER,
404  FLOATING_POINT
405 };
406 
407 
411 template <Category _CATEGORY, bool _PRIMITIVE, bool _NULL_TYPE, typename _UnsignedBits, typename T>
412 struct BaseTraits
413 {
415  static const Category CATEGORY = _CATEGORY;
416  enum
417  {
418  PRIMITIVE = _PRIMITIVE,
419  NULL_TYPE = _NULL_TYPE,
420  };
421 };
422 
423 
427 template <typename _UnsignedBits, typename T>
428 struct BaseTraits<UNSIGNED_INTEGER, true, false, _UnsignedBits, T>
429 {
430  typedef _UnsignedBits UnsignedBits;
431 
432  static const Category CATEGORY = UNSIGNED_INTEGER;
433  static const UnsignedBits LOWEST_KEY = UnsignedBits(0);
434  static const UnsignedBits MAX_KEY = UnsignedBits(-1);
435 
436  enum
437  {
438  PRIMITIVE = true,
439  NULL_TYPE = false,
440  };
441 
442 
443  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
444  {
445  return key;
446  }
447 
448  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
449  {
450  return key;
451  }
452 
453  static __host__ __device__ __forceinline__ T Max()
454  {
455  UnsignedBits retval = MAX_KEY;
456  return reinterpret_cast<T&>(retval);
457  }
458 
459  static __host__ __device__ __forceinline__ T Lowest()
460  {
461  UnsignedBits retval = LOWEST_KEY;
462  return reinterpret_cast<T&>(retval);
463  }
464 };
465 
466 
470 template <typename _UnsignedBits, typename T>
471 struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
472 {
473  typedef _UnsignedBits UnsignedBits;
474 
475  static const Category CATEGORY = SIGNED_INTEGER;
476  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
477  static const UnsignedBits LOWEST_KEY = HIGH_BIT;
478  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
479 
480  enum
481  {
482  PRIMITIVE = true,
483  NULL_TYPE = false,
484  };
485 
486  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
487  {
488  return key ^ HIGH_BIT;
489  };
490 
491  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
492  {
493  return key ^ HIGH_BIT;
494  };
495 
496  static __host__ __device__ __forceinline__ T Max()
497  {
498  UnsignedBits retval = MAX_KEY;
499  return reinterpret_cast<T&>(retval);
500  }
501 
502  static __host__ __device__ __forceinline__ T Lowest()
503  {
504  UnsignedBits retval = LOWEST_KEY;
505  return reinterpret_cast<T&>(retval);
506  }
507 };
508 
509 template <typename _T>
510 struct FpLimits;
511 
512 template <>
513 struct FpLimits<float>
514 {
515  static __host__ __device__ __forceinline__ float Max() {
516  return std::numeric_limits<float>::max();
517  }
518 
519  static __host__ __device__ __forceinline__ float Lowest() {
520  return std::numeric_limits<float>::max() * float(-1);
521  }
522 };
523 
524 template <>
525 struct FpLimits<double>
526 {
527  static __host__ __device__ __forceinline__ double Max() {
528  return std::numeric_limits<double>::max();
529  }
530 
531  static __host__ __device__ __forceinline__ double Lowest() {
532  return std::numeric_limits<double>::max() * double(-1);
533  }
534 };
535 
536 template <>
537 struct FpLimits<__half>
538 {
539  static __host__ __device__ __forceinline__ __half Max() {
540  unsigned short max_word = 0x7BFF;
541  return reinterpret_cast<__half&>(max_word);
542  }
543 
544  static __host__ __device__ __forceinline__ __half Lowest() {
545  unsigned short lowest_word = 0xFBFF;
546  return reinterpret_cast<__half&>(lowest_word);
547  }
548 };
549 
550 template <>
551 struct FpLimits<hip_bfloat16>
552 {
553  static __host__ __device__ __forceinline__ hip_bfloat16 Max() {
554  unsigned short max_word = 0x7F7F;
555  return reinterpret_cast<hip_bfloat16 &>(max_word);
556  }
557 
558  static __host__ __device__ __forceinline__ hip_bfloat16 Lowest() {
559  unsigned short lowest_word = 0xFF7F;
560  return reinterpret_cast<hip_bfloat16 &>(lowest_word);
561  }
562 };
563 
567 template <typename _UnsignedBits, typename T>
568 struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>
569 {
570  typedef _UnsignedBits UnsignedBits;
571 
572  static const Category CATEGORY = FLOATING_POINT;
573  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
574  static const UnsignedBits LOWEST_KEY = UnsignedBits(-1);
575  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
576 
577  enum
578  {
579  PRIMITIVE = true,
580  NULL_TYPE = false,
581  };
582 
583  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
584  {
585  UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT;
586  return key ^ mask;
587  };
588 
589  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
590  {
591  UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1);
592  return key ^ mask;
593  };
594 
595  static __host__ __device__ __forceinline__ T Max() {
596  return FpLimits<T>::Max();
597  }
598 
599  static __host__ __device__ __forceinline__ T Lowest() {
600  return FpLimits<T>::Lowest();
601  }
602 };
603 
604 
608 template <typename T> struct NumericTraits : BaseTraits<NOT_A_NUMBER, false, false, T, T> {};
609 
610 template <> struct NumericTraits<NullType> : BaseTraits<NOT_A_NUMBER, false, true, NullType, NullType> {};
611 
612 template <> struct NumericTraits<char> : BaseTraits<(std::numeric_limits<char>::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {};
613 template <> struct NumericTraits<signed char> : BaseTraits<SIGNED_INTEGER, true, false, unsigned char, signed char> {};
614 template <> struct NumericTraits<short> : BaseTraits<SIGNED_INTEGER, true, false, unsigned short, short> {};
615 template <> struct NumericTraits<int> : BaseTraits<SIGNED_INTEGER, true, false, unsigned int, int> {};
616 template <> struct NumericTraits<long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long, long> {};
617 template <> struct NumericTraits<long long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long long, long long> {};
618 
619 template <> struct NumericTraits<unsigned char> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned char, unsigned char> {};
620 template <> struct NumericTraits<unsigned short> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned short, unsigned short> {};
621 template <> struct NumericTraits<unsigned int> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned int, unsigned int> {};
622 template <> struct NumericTraits<unsigned long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long, unsigned long> {};
623 template <> struct NumericTraits<unsigned long long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long long, unsigned long long> {};
624 
625 template <> struct NumericTraits<float> : BaseTraits<FLOATING_POINT, true, false, unsigned int, float> {};
626 template <> struct NumericTraits<double> : BaseTraits<FLOATING_POINT, true, false, unsigned long long, double> {};
627 template <> struct NumericTraits<__half> : BaseTraits<FLOATING_POINT, true, false, unsigned short, __half> {};
628 template <> struct NumericTraits<hip_bfloat16 > : BaseTraits<FLOATING_POINT, true, false, unsigned short, hip_bfloat16 > {};
629 
630 template <> struct NumericTraits<bool> : BaseTraits<UNSIGNED_INTEGER, true, false, typename UnitWord<bool>::VolatileWord, bool> {};
631 
635 template <typename T>
636 struct Traits : NumericTraits<typename RemoveQualifiers<T>::Type> {};
637 
638 #endif // DOXYGEN_SHOULD_SKIP_THIS
639 
640 END_HIPCUB_NAMESPACE
641 
642 #endif // HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
Definition: util_type.hpp:108
Definition: util_type.hpp:54
Definition: util_type.hpp:144
Definition: util_type.hpp:60
Definition: util_type.hpp:66
Definition: util_type.hpp:101
Definition: util_type.hpp:78
Definition: util_type.hpp:72
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:363
__host__ __device__ __forceinline__ T & Alias()
Alias.
Definition: util_type.hpp:376
UnitWord< T >::DeviceWord DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.hpp:365