/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.0.2/hipcub/include/hipcub/backend/rocprim/util_type.hpp Source File
util_type.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
31 #define HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
32 
33 #include <limits>
34 #include <type_traits>
35 
36 #include "../../config.hpp"
37 
38 #include <rocprim/detail/various.hpp>
39 
40 #include <hip/hip_fp16.h>
41 #include <hip/hip_bfloat16.h>
42 
43 BEGIN_HIPCUB_NAMESPACE
44 
45 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
46 
47 using NullType = ::rocprim::empty_type;
48 
49 #endif
50 
51 template<bool B, typename T, typename F>
52 struct If
53 {
54  using Type = typename std::conditional<B, T, F>::type;
55 };
56 
57 template<typename T>
58 struct IsPointer
59 {
60  static constexpr bool VALUE = std::is_pointer<T>::value;
61 };
62 
63 template<typename T>
64 struct IsVolatile
65 {
66  static constexpr bool VALUE = std::is_volatile<T>::value;
67 };
68 
69 template<typename T>
71 {
72  using Type = typename std::remove_cv<T>::type;
73 };
74 
75 template<int N>
76 struct PowerOfTwo
77 {
78  static constexpr bool VALUE = ::rocprim::detail::is_power_of_two<N>();
79 };
80 
81 namespace detail
82 {
83 
84 template<int N, int CURRENT_VAL = N, int COUNT = 0>
85 struct Log2Impl
86 {
87  static constexpr int VALUE = Log2Impl<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE;
88 };
89 
90 template<int N, int COUNT>
91 struct Log2Impl<N, 0, COUNT>
92 {
93  static constexpr int VALUE = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1;
94 };
95 
96 } // end of detail namespace
97 
98 template<int N>
99 struct Log2
100 {
101  static_assert(N != 0, "The logarithm of zero is undefined");
102  static constexpr int VALUE = detail::Log2Impl<N>::VALUE;
103 };
104 
105 template<typename T>
107 {
108  T * d_buffers[2];
109 
110  int selector;
111 
112  HIPCUB_HOST_DEVICE inline
113  DoubleBuffer()
114  {
115  selector = 0;
116  d_buffers[0] = nullptr;
117  d_buffers[1] = nullptr;
118  }
119 
120  HIPCUB_HOST_DEVICE inline
121  DoubleBuffer(T * d_current, T * d_alternate)
122  {
123  selector = 0;
124  d_buffers[0] = d_current;
125  d_buffers[1] = d_alternate;
126  }
127 
128  HIPCUB_HOST_DEVICE inline
129  T * Current()
130  {
131  return d_buffers[selector];
132  }
133 
134  HIPCUB_HOST_DEVICE inline
135  T * Alternate()
136  {
137  return d_buffers[selector ^ 1];
138  }
139 };
140 
141 template <int A>
142 struct Int2Type
143 {
144  enum {VALUE = A};
145 };
146 
147 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
148 
149 template<
150  class Key,
151  class Value
152 >
153 using KeyValuePair = ::rocprim::key_value_pair<Key, Value>;
154 
155 #endif
156 
157 namespace detail
158 {
159 
160 template<typename T>
161 inline
162 ::rocprim::double_buffer<T> to_double_buffer(DoubleBuffer<T>& source)
163 {
164  return ::rocprim::double_buffer<T>(source.Current(), source.Alternate());
165 }
166 
167 template<typename T>
168 inline
169 void update_double_buffer(DoubleBuffer<T>& target, ::rocprim::double_buffer<T>& source)
170 {
171  if(target.Current() != source.current())
172  {
173  target.selector ^= 1;
174  }
175 }
176 
177 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
178 
179 template <typename T>
180 using is_integral_or_enum =
181  std::integral_constant<bool, std::is_integral<T>::value || std::is_enum<T>::value>;
182 
183 #endif
184 
185 }
186 
187 template <typename NumeratorT, typename DenominatorT>
188 __host__ __device__ __forceinline__ constexpr NumeratorT
189 DivideAndRoundUp(NumeratorT n, DenominatorT d)
190 {
191  static_assert(hipcub::detail::is_integral_or_enum<NumeratorT>::value &&
192  hipcub::detail::is_integral_or_enum<DenominatorT>::value,
193  "DivideAndRoundUp is only intended for integral types.");
194 
195  // Static cast to undo integral promotion.
196  return static_cast<NumeratorT>(n / d + (n % d != 0 ? 1 : 0));
197 }
198 
199 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
200 
201 /******************************************************************************
202  * Size and alignment
203  ******************************************************************************/
204 
206 template <typename T>
207 struct AlignBytes
208 {
209  struct Pad
210  {
211  T val;
212  char byte;
213  };
214 
215  enum
216  {
218  ALIGN_BYTES = sizeof(Pad) - sizeof(T)
219  };
220 
222  typedef T Type;
223 };
224 
225 // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree
226 // with device C++ compilers (EDG) on types passed as template parameters through
227 // kernel functions
228 
229 #define __HIPCUB_ALIGN_BYTES(t, b) \
230  template <> struct AlignBytes<t> \
231  { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; };
232 
233 __HIPCUB_ALIGN_BYTES(short4, 8)
234 __HIPCUB_ALIGN_BYTES(ushort4, 8)
235 __HIPCUB_ALIGN_BYTES(int2, 8)
236 __HIPCUB_ALIGN_BYTES(uint2, 8)
237 __HIPCUB_ALIGN_BYTES(long long, 8)
238 __HIPCUB_ALIGN_BYTES(unsigned long long, 8)
239 __HIPCUB_ALIGN_BYTES(float2, 8)
240 __HIPCUB_ALIGN_BYTES(double, 8)
241 #ifdef _WIN32
242  __HIPCUB_ALIGN_BYTES(long2, 8)
243  __HIPCUB_ALIGN_BYTES(ulong2, 8)
244 #else
245  __HIPCUB_ALIGN_BYTES(long2, 16)
246  __HIPCUB_ALIGN_BYTES(ulong2, 16)
247 #endif
248 __HIPCUB_ALIGN_BYTES(int4, 16)
249 __HIPCUB_ALIGN_BYTES(uint4, 16)
250 __HIPCUB_ALIGN_BYTES(float4, 16)
251 __HIPCUB_ALIGN_BYTES(long4, 16)
252 __HIPCUB_ALIGN_BYTES(ulong4, 16)
253 __HIPCUB_ALIGN_BYTES(longlong2, 16)
254 __HIPCUB_ALIGN_BYTES(ulonglong2, 16)
255 __HIPCUB_ALIGN_BYTES(double2, 16)
256 __HIPCUB_ALIGN_BYTES(longlong4, 16)
257 __HIPCUB_ALIGN_BYTES(ulonglong4, 16)
258 __HIPCUB_ALIGN_BYTES(double4, 16)
259 
260 template <typename T> struct AlignBytes<volatile T> : AlignBytes<T> {};
261 template <typename T> struct AlignBytes<const T> : AlignBytes<T> {};
262 template <typename T> struct AlignBytes<const volatile T> : AlignBytes<T> {};
263 
264 
266 template <typename T>
267 struct UnitWord
268 {
269  enum {
270  ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES
271  };
272 
273  template <typename Unit>
274  struct IsMultiple
275  {
276  enum {
277  UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES,
278  IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (int(ALIGN_BYTES) % int(UNIT_ALIGN_BYTES) == 0)
279  };
280  };
281 
283  typedef typename If<IsMultiple<int>::IS_MULTIPLE,
284  unsigned int,
285  typename If<IsMultiple<short>::IS_MULTIPLE,
286  unsigned short,
287  unsigned char>::Type>::Type ShuffleWord;
288 
290  typedef typename If<IsMultiple<long long>::IS_MULTIPLE,
291  unsigned long long,
292  ShuffleWord>::Type VolatileWord;
293 
295  typedef typename If<IsMultiple<longlong2>::IS_MULTIPLE,
296  ulonglong2,
297  VolatileWord>::Type DeviceWord;
298 
300  typedef typename If<IsMultiple<int4>::IS_MULTIPLE,
301  uint4,
302  typename If<IsMultiple<int2>::IS_MULTIPLE,
303  uint2,
304  ShuffleWord>::Type>::Type TextureWord;
305 };
306 
307 
308 // float2 specialization workaround (for SM10-SM13)
309 template <>
310 struct UnitWord <float2>
311 {
312  typedef int ShuffleWord;
313  typedef unsigned long long VolatileWord;
314  typedef unsigned long long DeviceWord;
315  typedef float2 TextureWord;
316 };
317 
318 // float4 specialization workaround (for SM10-SM13)
319 template <>
320 struct UnitWord <float4>
321 {
322  typedef int ShuffleWord;
323  typedef unsigned long long VolatileWord;
324  typedef ulonglong2 DeviceWord;
325  typedef float4 TextureWord;
326 };
327 
328 
329 // char2 specialization workaround (for SM10-SM13)
330 template <>
331 struct UnitWord <char2>
332 {
333  typedef unsigned short ShuffleWord;
334  typedef unsigned short VolatileWord;
335  typedef unsigned short DeviceWord;
336  typedef unsigned short TextureWord;
337 };
338 
339 
340 template <typename T> struct UnitWord<volatile T> : UnitWord<T> {};
341 template <typename T> struct UnitWord<const T> : UnitWord<T> {};
342 template <typename T> struct UnitWord<const volatile T> : UnitWord<T> {};
343 
344 
345 #endif // DOXYGEN_SHOULD_SKIP_THIS
346 
347 
348 
349 
350 /******************************************************************************
351  * Wrapper types
352  ******************************************************************************/
353 
357 template <typename T>
359 {
361  typedef typename UnitWord<T>::DeviceWord DeviceWord;
362 
363  enum
364  {
365  WORDS = sizeof(T) / sizeof(DeviceWord)
366  };
367 
369  DeviceWord storage[WORDS];
370 
372  __host__ __device__ __forceinline__ T& Alias()
373  {
374  return reinterpret_cast<T&>(*this);
375  }
376 };
377 
378 
379 /******************************************************************************
380  * Simple type traits utilities.
381  *
382  * For example:
383  * Traits<int>::CATEGORY // SIGNED_INTEGER
384  * Traits<NullType>::NULL_TYPE // true
385  * Traits<uint4>::CATEGORY // NOT_A_NUMBER
386  * Traits<uint4>::PRIMITIVE; // false
387  *
388  ******************************************************************************/
389 
390  #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
391 
395 enum Category
396 {
397  NOT_A_NUMBER,
398  SIGNED_INTEGER,
399  UNSIGNED_INTEGER,
400  FLOATING_POINT
401 };
402 
403 
407 template <Category _CATEGORY, bool _PRIMITIVE, bool _NULL_TYPE, typename _UnsignedBits, typename T>
408 struct BaseTraits
409 {
411  static const Category CATEGORY = _CATEGORY;
412  enum
413  {
414  PRIMITIVE = _PRIMITIVE,
415  NULL_TYPE = _NULL_TYPE,
416  };
417 };
418 
419 
423 template <typename _UnsignedBits, typename T>
424 struct BaseTraits<UNSIGNED_INTEGER, true, false, _UnsignedBits, T>
425 {
426  typedef _UnsignedBits UnsignedBits;
427 
428  static const Category CATEGORY = UNSIGNED_INTEGER;
429  static const UnsignedBits LOWEST_KEY = UnsignedBits(0);
430  static const UnsignedBits MAX_KEY = UnsignedBits(-1);
431 
432  enum
433  {
434  PRIMITIVE = true,
435  NULL_TYPE = false,
436  };
437 
438 
439  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
440  {
441  return key;
442  }
443 
444  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
445  {
446  return key;
447  }
448 
449  static __host__ __device__ __forceinline__ T Max()
450  {
451  UnsignedBits retval = MAX_KEY;
452  return reinterpret_cast<T&>(retval);
453  }
454 
455  static __host__ __device__ __forceinline__ T Lowest()
456  {
457  UnsignedBits retval = LOWEST_KEY;
458  return reinterpret_cast<T&>(retval);
459  }
460 };
461 
462 
466 template <typename _UnsignedBits, typename T>
467 struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
468 {
469  typedef _UnsignedBits UnsignedBits;
470 
471  static const Category CATEGORY = SIGNED_INTEGER;
472  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
473  static const UnsignedBits LOWEST_KEY = HIGH_BIT;
474  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
475 
476  enum
477  {
478  PRIMITIVE = true,
479  NULL_TYPE = false,
480  };
481 
482  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
483  {
484  return key ^ HIGH_BIT;
485  };
486 
487  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
488  {
489  return key ^ HIGH_BIT;
490  };
491 
492  static __host__ __device__ __forceinline__ T Max()
493  {
494  UnsignedBits retval = MAX_KEY;
495  return reinterpret_cast<T&>(retval);
496  }
497 
498  static __host__ __device__ __forceinline__ T Lowest()
499  {
500  UnsignedBits retval = LOWEST_KEY;
501  return reinterpret_cast<T&>(retval);
502  }
503 };
504 
505 template <typename _T>
506 struct FpLimits;
507 
508 template <>
509 struct FpLimits<float>
510 {
511  static __host__ __device__ __forceinline__ float Max() {
512  return std::numeric_limits<float>::max();
513  }
514 
515  static __host__ __device__ __forceinline__ float Lowest() {
516  return std::numeric_limits<float>::max() * float(-1);
517  }
518 };
519 
520 template <>
521 struct FpLimits<double>
522 {
523  static __host__ __device__ __forceinline__ double Max() {
524  return std::numeric_limits<double>::max();
525  }
526 
527  static __host__ __device__ __forceinline__ double Lowest() {
528  return std::numeric_limits<double>::max() * double(-1);
529  }
530 };
531 
532 template <>
533 struct FpLimits<__half>
534 {
535  static __host__ __device__ __forceinline__ __half Max() {
536  unsigned short max_word = 0x7BFF;
537  return reinterpret_cast<__half&>(max_word);
538  }
539 
540  static __host__ __device__ __forceinline__ __half Lowest() {
541  unsigned short lowest_word = 0xFBFF;
542  return reinterpret_cast<__half&>(lowest_word);
543  }
544 };
545 
546 template <>
547 struct FpLimits<hip_bfloat16>
548 {
549  static __host__ __device__ __forceinline__ hip_bfloat16 Max() {
550  unsigned short max_word = 0x7F7F;
551  return reinterpret_cast<hip_bfloat16 &>(max_word);
552  }
553 
554  static __host__ __device__ __forceinline__ hip_bfloat16 Lowest() {
555  unsigned short lowest_word = 0xFF7F;
556  return reinterpret_cast<hip_bfloat16 &>(lowest_word);
557  }
558 };
559 
563 template <typename _UnsignedBits, typename T>
564 struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>
565 {
566  typedef _UnsignedBits UnsignedBits;
567 
568  static const Category CATEGORY = FLOATING_POINT;
569  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
570  static const UnsignedBits LOWEST_KEY = UnsignedBits(-1);
571  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
572 
573  enum
574  {
575  PRIMITIVE = true,
576  NULL_TYPE = false,
577  };
578 
579  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
580  {
581  UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT;
582  return key ^ mask;
583  };
584 
585  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
586  {
587  UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1);
588  return key ^ mask;
589  };
590 
591  static __host__ __device__ __forceinline__ T Max() {
592  return FpLimits<T>::Max();
593  }
594 
595  static __host__ __device__ __forceinline__ T Lowest() {
596  return FpLimits<T>::Lowest();
597  }
598 };
599 
600 
604 template <typename T> struct NumericTraits : BaseTraits<NOT_A_NUMBER, false, false, T, T> {};
605 
606 template <> struct NumericTraits<NullType> : BaseTraits<NOT_A_NUMBER, false, true, NullType, NullType> {};
607 
608 template <> struct NumericTraits<char> : BaseTraits<(std::numeric_limits<char>::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {};
609 template <> struct NumericTraits<signed char> : BaseTraits<SIGNED_INTEGER, true, false, unsigned char, signed char> {};
610 template <> struct NumericTraits<short> : BaseTraits<SIGNED_INTEGER, true, false, unsigned short, short> {};
611 template <> struct NumericTraits<int> : BaseTraits<SIGNED_INTEGER, true, false, unsigned int, int> {};
612 template <> struct NumericTraits<long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long, long> {};
613 template <> struct NumericTraits<long long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long long, long long> {};
614 
615 template <> struct NumericTraits<unsigned char> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned char, unsigned char> {};
616 template <> struct NumericTraits<unsigned short> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned short, unsigned short> {};
617 template <> struct NumericTraits<unsigned int> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned int, unsigned int> {};
618 template <> struct NumericTraits<unsigned long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long, unsigned long> {};
619 template <> struct NumericTraits<unsigned long long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long long, unsigned long long> {};
620 
621 template <> struct NumericTraits<float> : BaseTraits<FLOATING_POINT, true, false, unsigned int, float> {};
622 template <> struct NumericTraits<double> : BaseTraits<FLOATING_POINT, true, false, unsigned long long, double> {};
623 template <> struct NumericTraits<__half> : BaseTraits<FLOATING_POINT, true, false, unsigned short, __half> {};
624 template <> struct NumericTraits<hip_bfloat16 > : BaseTraits<FLOATING_POINT, true, false, unsigned short, hip_bfloat16 > {};
625 
626 template <> struct NumericTraits<bool> : BaseTraits<UNSIGNED_INTEGER, true, false, typename UnitWord<bool>::VolatileWord, bool> {};
627 
631 template <typename T>
632 struct Traits : NumericTraits<typename RemoveQualifiers<T>::Type> {};
633 
634 #endif // DOXYGEN_SHOULD_SKIP_THIS
635 
636 END_HIPCUB_NAMESPACE
637 
638 #endif // HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
Definition: util_type.hpp:107
Definition: util_type.hpp:53
Definition: util_type.hpp:143
Definition: util_type.hpp:59
Definition: util_type.hpp:65
Definition: util_type.hpp:100
Definition: util_type.hpp:77
Definition: util_type.hpp:71
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:359
__host__ __device__ __forceinline__ T & Alias()
Alias.
Definition: util_type.hpp:372
UnitWord< T >::DeviceWord DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.hpp:361