30 #ifndef HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
31 #define HIPCUB_ROCPRIM_UTIL_TYPE_HPP_
34 #include <type_traits>
36 #include "../../config.hpp"
38 #include <rocprim/detail/various.hpp>
39 #include <rocprim/types/future_value.hpp>
41 #include <hip/hip_fp16.h>
42 #include <hip/hip_bfloat16.h>
44 BEGIN_HIPCUB_NAMESPACE
46 #ifndef DOXYGEN_SHOULD_SKIP_THIS
48 using NullType = ::rocprim::empty_type;
52 template<
bool B,
typename T,
typename F>
struct
53 [[deprecated(
"[Since 1.16] If is deprecated use std::conditional instead.")]]
If
55 using Type =
typename std::conditional<B, T, F>::type;
58 template<
typename T>
struct
59 [[deprecated(
"[Since 1.16] IsPointer is deprecated use std::is_pointer instead.")]]
IsPointer
61 static constexpr
bool VALUE = std::is_pointer<T>::value;
64 template<
typename T>
struct
65 [[deprecated(
"[Since 1.16] IsVolatile is deprecated use std::is_volatile instead.")]]
IsVolatile
67 static constexpr
bool VALUE = std::is_volatile<T>::value;
70 template<
typename T>
struct
73 using Type =
typename std::remove_cv<T>::type;
79 static constexpr
bool VALUE = ::rocprim::detail::is_power_of_two(N);
85 template<
int N,
int CURRENT_VAL = N,
int COUNT = 0>
88 static constexpr
int VALUE = Log2Impl<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE;
91 template<
int N,
int COUNT>
92 struct Log2Impl<N, 0, COUNT>
94 static constexpr
int VALUE = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1;
102 static_assert(N != 0,
"The logarithm of zero is undefined");
103 static constexpr
int VALUE = detail::Log2Impl<N>::VALUE;
113 HIPCUB_HOST_DEVICE
inline
117 d_buffers[0] =
nullptr;
118 d_buffers[1] =
nullptr;
121 HIPCUB_HOST_DEVICE
inline
125 d_buffers[0] = d_current;
126 d_buffers[1] = d_alternate;
129 HIPCUB_HOST_DEVICE
inline
132 return d_buffers[selector];
135 HIPCUB_HOST_DEVICE
inline
138 return d_buffers[selector ^ 1];
148 #ifndef DOXYGEN_SHOULD_SKIP_THIS
154 using KeyValuePair = ::rocprim::key_value_pair<Key, Value>;
158 template <
typename T,
typename Iter = T*>
159 using FutureValue = ::rocprim::future_value<T, Iter>;
168 return ::rocprim::double_buffer<T>(source.Current(), source.Alternate());
173 void update_double_buffer(DoubleBuffer<T>& target, ::rocprim::double_buffer<T>& source)
175 if(target.Current() != source.current())
177 target.selector ^= 1;
181 #ifndef DOXYGEN_SHOULD_SKIP_THIS
183 template <
typename T>
184 using is_integral_or_enum =
185 std::integral_constant<bool, std::is_integral<T>::value || std::is_enum<T>::value>;
191 template <
typename NumeratorT,
typename DenominatorT>
192 HIPCUB_HOST_DEVICE __forceinline__ constexpr NumeratorT
193 DivideAndRoundUp(NumeratorT n, DenominatorT d)
195 static_assert(hipcub::detail::is_integral_or_enum<NumeratorT>::value &&
196 hipcub::detail::is_integral_or_enum<DenominatorT>::value,
197 "DivideAndRoundUp is only intended for integral types.");
200 return static_cast<NumeratorT
>(n / d + (n % d != 0 ? 1 : 0));
203 #ifndef DOXYGEN_SHOULD_SKIP_THIS
210 template <
typename T>
222 ALIGN_BYTES =
sizeof(Pad) -
sizeof(T)
233 #define __HIPCUB_ALIGN_BYTES(t, b) \
234 template <> struct AlignBytes<t> \
235 { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; };
237 __HIPCUB_ALIGN_BYTES(short4, 8)
238 __HIPCUB_ALIGN_BYTES(ushort4, 8)
239 __HIPCUB_ALIGN_BYTES(int2, 8)
240 __HIPCUB_ALIGN_BYTES(uint2, 8)
241 __HIPCUB_ALIGN_BYTES(
long long, 8)
242 __HIPCUB_ALIGN_BYTES(
unsigned long long, 8)
243 __HIPCUB_ALIGN_BYTES(float2, 8)
244 __HIPCUB_ALIGN_BYTES(
double, 8)
246 __HIPCUB_ALIGN_BYTES(long2, 8)
247 __HIPCUB_ALIGN_BYTES(ulong2, 8)
249 __HIPCUB_ALIGN_BYTES(long2, 16)
250 __HIPCUB_ALIGN_BYTES(ulong2, 16)
252 __HIPCUB_ALIGN_BYTES(int4, 16)
253 __HIPCUB_ALIGN_BYTES(uint4, 16)
254 __HIPCUB_ALIGN_BYTES(float4, 16)
255 __HIPCUB_ALIGN_BYTES(long4, 16)
256 __HIPCUB_ALIGN_BYTES(ulong4, 16)
257 __HIPCUB_ALIGN_BYTES(longlong2, 16)
258 __HIPCUB_ALIGN_BYTES(ulonglong2, 16)
259 __HIPCUB_ALIGN_BYTES(double2, 16)
260 __HIPCUB_ALIGN_BYTES(longlong4, 16)
261 __HIPCUB_ALIGN_BYTES(ulonglong4, 16)
262 __HIPCUB_ALIGN_BYTES(double4, 16)
264 template <typename T> struct AlignBytes<volatile T> : AlignBytes<T> {};
265 template <
typename T>
struct AlignBytes<const T> : AlignBytes<T> {};
266 template <
typename T>
struct AlignBytes<const volatile T> : AlignBytes<T> {};
270 template <
typename T>
274 ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES
277 template <
typename Unit>
281 UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES,
282 IS_MULTIPLE = (
sizeof(T) %
sizeof(Unit) == 0) && (
int(ALIGN_BYTES) % int(UNIT_ALIGN_BYTES) == 0)
287 typedef typename std::conditional<IsMultiple<int>::IS_MULTIPLE,
289 typename std::conditional<IsMultiple<short>::IS_MULTIPLE,
291 unsigned char>::type>::type ShuffleWord;
294 typedef typename std::conditional<IsMultiple<long long>::IS_MULTIPLE,
296 ShuffleWord>::type VolatileWord;
299 typedef typename std::conditional<IsMultiple<longlong2>::IS_MULTIPLE,
301 VolatileWord>::type DeviceWord;
304 typedef typename std::conditional<IsMultiple<int4>::IS_MULTIPLE,
306 typename std::conditional<IsMultiple<int2>::IS_MULTIPLE,
308 ShuffleWord>::type>::type TextureWord;
314 struct UnitWord <float2>
316 typedef int ShuffleWord;
317 typedef unsigned long long VolatileWord;
318 typedef unsigned long long DeviceWord;
319 typedef float2 TextureWord;
324 struct UnitWord <float4>
326 typedef int ShuffleWord;
327 typedef unsigned long long VolatileWord;
328 typedef ulonglong2 DeviceWord;
329 typedef float4 TextureWord;
335 struct UnitWord <char2>
337 typedef unsigned short ShuffleWord;
338 typedef unsigned short VolatileWord;
339 typedef unsigned short DeviceWord;
340 typedef unsigned short TextureWord;
344 template <
typename T>
struct UnitWord<volatile T> : UnitWord<T> {};
345 template <
typename T>
struct UnitWord<const T> : UnitWord<T> {};
346 template <
typename T>
struct UnitWord<const volatile T> : UnitWord<T> {};
361 template <
typename T>
367 static constexpr std::size_t DATA_SIZE =
sizeof(T);
368 static constexpr std::size_t WORD_SIZE =
sizeof(
DeviceWord);
369 static constexpr std::size_t WORDS = DATA_SIZE / WORD_SIZE;
375 HIPCUB_HOST_DEVICE __forceinline__ T&
Alias()
377 return reinterpret_cast<T&
>(*this);
393 #ifndef DOXYGEN_SHOULD_SKIP_THIS
410 template <Category _CATEGORY,
bool _PRIMITIVE,
bool _NULL_TYPE,
typename _Un
signedBits,
typename T>
414 static const Category CATEGORY = _CATEGORY;
417 PRIMITIVE = _PRIMITIVE,
418 NULL_TYPE = _NULL_TYPE,
426 template <
typename _Un
signedBits,
typename T>
427 struct BaseTraits<UNSIGNED_INTEGER, true, false, _UnsignedBits, T>
429 typedef _UnsignedBits UnsignedBits;
431 static const Category CATEGORY = UNSIGNED_INTEGER;
432 static const UnsignedBits LOWEST_KEY = UnsignedBits(0);
433 static const UnsignedBits MAX_KEY = UnsignedBits(-1);
442 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
447 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
452 static HIPCUB_HOST_DEVICE __forceinline__ T Max()
454 UnsignedBits retval_bits = MAX_KEY;
456 memcpy(&retval, &retval_bits,
sizeof(T));
460 static HIPCUB_HOST_DEVICE __forceinline__ T Lowest()
462 UnsignedBits retval_bits = LOWEST_KEY;
464 memcpy(&retval, &retval_bits,
sizeof(T));
473 template <
typename _Un
signedBits,
typename T>
474 struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
476 typedef _UnsignedBits UnsignedBits;
478 static const Category CATEGORY = SIGNED_INTEGER;
479 static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((
sizeof(UnsignedBits) * 8) - 1);
480 static const UnsignedBits LOWEST_KEY = HIGH_BIT;
481 static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
489 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
491 return key ^ HIGH_BIT;
494 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
496 return key ^ HIGH_BIT;
499 static HIPCUB_HOST_DEVICE __forceinline__ T Max()
501 UnsignedBits retval = MAX_KEY;
502 return reinterpret_cast<T&
>(retval);
505 static HIPCUB_HOST_DEVICE __forceinline__ T Lowest()
507 UnsignedBits retval = LOWEST_KEY;
508 return reinterpret_cast<T&
>(retval);
512 template <
typename _T>
516 struct FpLimits<float>
518 static HIPCUB_HOST_DEVICE __forceinline__
float Max() {
519 return std::numeric_limits<float>::max();
522 static HIPCUB_HOST_DEVICE __forceinline__
float Lowest() {
523 return std::numeric_limits<float>::max() * float(-1);
528 struct FpLimits<double>
530 static HIPCUB_HOST_DEVICE __forceinline__
double Max() {
531 return std::numeric_limits<double>::max();
534 static HIPCUB_HOST_DEVICE __forceinline__
double Lowest() {
535 return std::numeric_limits<double>::max() * double(-1);
540 struct FpLimits<__half>
542 static HIPCUB_HOST_DEVICE __forceinline__ __half Max() {
543 unsigned short max_word = 0x7BFF;
544 return reinterpret_cast<__half&
>(max_word);
547 static HIPCUB_HOST_DEVICE __forceinline__ __half Lowest() {
548 unsigned short lowest_word = 0xFBFF;
549 return reinterpret_cast<__half&
>(lowest_word);
554 struct FpLimits<hip_bfloat16>
556 static HIPCUB_HOST_DEVICE __forceinline__ hip_bfloat16 Max() {
557 unsigned short max_word = 0x7F7F;
558 return reinterpret_cast<hip_bfloat16 &
>(max_word);
561 static HIPCUB_HOST_DEVICE __forceinline__ hip_bfloat16 Lowest() {
562 unsigned short lowest_word = 0xFF7F;
563 return reinterpret_cast<hip_bfloat16 &
>(lowest_word);
570 template <
typename _Un
signedBits,
typename T>
571 struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>
573 typedef _UnsignedBits UnsignedBits;
575 static const Category CATEGORY = FLOATING_POINT;
576 static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((
sizeof(UnsignedBits) * 8) - 1);
577 static const UnsignedBits LOWEST_KEY = UnsignedBits(-1);
578 static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
586 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
588 UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT;
592 static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
594 UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1);
598 static HIPCUB_HOST_DEVICE __forceinline__ T Max() {
599 return FpLimits<T>::Max();
602 static HIPCUB_HOST_DEVICE __forceinline__ T Lowest() {
603 return FpLimits<T>::Lowest();
611 template <
typename T>
struct NumericTraits : BaseTraits<NOT_A_NUMBER, false, false, T, T> {};
613 template <>
struct NumericTraits<NullType> : BaseTraits<NOT_A_NUMBER, false, true, NullType, NullType> {};
615 template <>
struct NumericTraits<char> : BaseTraits<(std::numeric_limits<char>::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {};
616 template <>
struct NumericTraits<signed char> : BaseTraits<SIGNED_INTEGER, true, false, unsigned char, signed char> {};
617 template <>
struct NumericTraits<short> : BaseTraits<SIGNED_INTEGER, true, false, unsigned short, short> {};
618 template <>
struct NumericTraits<int> : BaseTraits<SIGNED_INTEGER, true, false, unsigned int, int> {};
619 template <>
struct NumericTraits<long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long, long> {};
620 template <>
struct NumericTraits<long long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long long, long long> {};
622 template <>
struct NumericTraits<unsigned char> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned char, unsigned char> {};
623 template <>
struct NumericTraits<unsigned short> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned short, unsigned short> {};
624 template <>
struct NumericTraits<unsigned int> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned int, unsigned int> {};
625 template <>
struct NumericTraits<unsigned long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long, unsigned long> {};
626 template <>
struct NumericTraits<unsigned long long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long long, unsigned long long> {};
628 template <>
struct NumericTraits<float> : BaseTraits<FLOATING_POINT, true, false, unsigned int, float> {};
629 template <>
struct NumericTraits<double> : BaseTraits<FLOATING_POINT, true, false, unsigned long long, double> {};
630 template <>
struct NumericTraits<__half> : BaseTraits<FLOATING_POINT, true, false, unsigned short, __half> {};
631 template <>
struct NumericTraits<hip_bfloat16 > : BaseTraits<FLOATING_POINT, true, false, unsigned short, hip_bfloat16 > {};
633 template <>
struct NumericTraits<bool> : BaseTraits<UNSIGNED_INTEGER, true, false, typename UnitWord<bool>::VolatileWord, bool> {};
638 template <
typename T>
639 struct Traits : NumericTraits<typename std::remove_cv<T>::type> {};
Definition: util_type.hpp:108
Definition: util_type.hpp:54
Definition: util_type.hpp:144
Definition: util_type.hpp:60
Definition: util_type.hpp:66
Definition: util_type.hpp:101
Definition: util_type.hpp:78
Definition: util_type.hpp:72
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.hpp:363
__host__ __device__ __forceinline__ T & Alias()
Alias.
Definition: util_type.hpp:375
UnitWord< T >::DeviceWord DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.hpp:365