21 #ifndef ROCRAND_COMMON_H_
22 #define ROCRAND_COMMON_H_
24 #define ROCRAND_2POW16_INV (1.5258789e-05f)
25 #define ROCRAND_2POW16_INV_2PI (9.58738e-05f)
26 #define ROCRAND_2POW32_INV (2.3283064e-10f)
27 #define ROCRAND_2POW32_INV_DOUBLE (2.3283064365386963e-10)
28 #define ROCRAND_2POW64_INV (5.4210109e-20f)
29 #define ROCRAND_2POW64_INV_DOUBLE (5.4210108624275221700372640043497e-20)
30 #define ROCRAND_2POW32_INV_2PI (1.46291807e-09f)
31 #define ROCRAND_2POW53_INV_DOUBLE (1.1102230246251565e-16)
32 #define ROCRAND_PI (3.141592653f)
33 #define ROCRAND_PI_DOUBLE (3.1415926535897932)
34 #define ROCRAND_2PI (6.2831855f)
35 #define ROCRAND_SQRT2 (1.4142135f)
36 #define ROCRAND_SQRT2_DOUBLE (1.4142135623730951)
37 #define ROCRAND_NAN_FLOAT (0x7fc00000)
38 #define ROCRAND_NAN_DOUBLE (0x7ff8000000000000)
40 #include <hip/hip_runtime.h>
43 #define ROCRAND_KERNEL __global__ static
45 #if __HIP_DEVICE_COMPILE__ \
46 && (defined(__HIP_PLATFORM_AMD__) \
47 || (defined(__HIP_PLATFORM_NVCC__) && (__CUDA_ARCH__ >= 530)))
48 #define ROCRAND_HALF_MATH_SUPPORTED
59 #define ROCRAND_STRINGIZE(X) ROCRAND_DO_STRINGIZE(X)
60 #define ROCRAND_DO_STRINGIZE(X) #X
71 #if defined(__INTEL_COMPILER)
72 #define ROCRAND_PRAGMA_MESSAGE(x) \
73 __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
74 #elif defined(__GNUC__)
75 #define ROCRAND_PRAGMA_MESSAGE(x) _Pragma(ROCRAND_STRINGIZE(message(x)))
76 #elif defined(_MSC_VER)
77 #define ROCRAND_PRAGMA_MESSAGE(x) \
78 __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
80 #define ROCRAND_PRAGMA_MESSAGE(x)
83 #if __cplusplus >= 201402L
84 #define ROCRAND_DEPRECATED(msg) [[deprecated(msg)]]
85 #elif defined(_MSC_VER) && !defined(__clang__)
86 #define ROCRAND_DEPRECATED(msg) __declspec(deprecated(msg))
87 #elif defined(__clang__) || defined(__GNUC__)
88 #define ROCRAND_DEPRECATED(msg) __attribute__((deprecated(msg)))
90 #define ROCRAND_DEPRECATED(msg)
111 #if defined(__HIP_PLATFORM_AMD__)
112 #if HIP_VERSION_MAJOR < 7
113 #define ROCRAND_HIPVEC_ACCESS(x) x.data
115 #define ROCRAND_HIPVEC_ACCESS(x) x
119 namespace rocrand_device {
122 __forceinline__ __device__ __host__
124 mad_u64_u32(
const unsigned int x,
const unsigned int y,
const unsigned long long z)
126 return static_cast<unsigned long long>(x) *
static_cast<unsigned long long>(y) + z;
129 __forceinline__ __device__ __host__
130 unsigned long long mul_u64_u32(
const unsigned int x,
const unsigned int y)
132 return static_cast<unsigned long long>(x) *
static_cast<unsigned long long>(y);
137 template<
typename Engine>
138 struct engine_boxmuller_helper
140 static __forceinline__ __device__ __host__
bool has_float(
const Engine* engine)
142 return engine->m_state.boxmuller_float != ROCRAND_NAN_FLOAT;
145 static __forceinline__ __device__ __host__
float get_float(Engine* engine)
147 const float ret = engine->m_state.boxmuller_float;
148 engine->m_state.boxmuller_float = ROCRAND_NAN_FLOAT;
152 static __forceinline__ __device__ __host__
void save_float(Engine* engine,
float f)
154 engine->m_state.boxmuller_float = f;
157 static __forceinline__ __device__ __host__
bool has_double(
const Engine* engine)
159 return engine->m_state.boxmuller_double != ROCRAND_NAN_DOUBLE;
162 static __forceinline__ __device__ __host__
163 double get_double(Engine* engine)
165 const double ret = engine->m_state.boxmuller_double;
166 engine->m_state.boxmuller_double = ROCRAND_NAN_DOUBLE;
170 static __forceinline__ __device__ __host__
void save_double(Engine* engine,
double d)
172 engine->m_state.boxmuller_double = d;
177 __forceinline__ __device__ __host__
void split_ull(T& lo, T& hi,
unsigned long long int val);
180 __forceinline__ __device__ __host__
void
181 split_ull(
unsigned int& lo,
unsigned int& hi,
unsigned long long int val)
183 lo = val & 0xFFFFFFFF;
184 hi = (val >> 32) & 0xFFFFFFFF;
188 __forceinline__ __device__ __host__
void
189 split_ull(
unsigned long long int& lo,
unsigned long long int& hi,
unsigned long long int val)