21 #ifndef ROCRAND_NORMAL_H_
22 #define ROCRAND_NORMAL_H_
31 #include "rocrand/rocrand_lfsr113.h"
32 #include "rocrand/rocrand_mrg31k3p.h"
33 #include "rocrand/rocrand_mrg32k3a.h"
34 #include "rocrand/rocrand_mtgp32.h"
35 #include "rocrand/rocrand_philox4x32_10.h"
36 #include "rocrand/rocrand_scrambled_sobol32.h"
37 #include "rocrand/rocrand_scrambled_sobol64.h"
38 #include "rocrand/rocrand_sobol32.h"
39 #include "rocrand/rocrand_sobol64.h"
40 #include "rocrand/rocrand_threefry2x32_20.h"
41 #include "rocrand/rocrand_threefry2x64_20.h"
42 #include "rocrand/rocrand_threefry4x32_20.h"
43 #include "rocrand/rocrand_threefry4x64_20.h"
44 #include "rocrand/rocrand_xorwow.h"
46 #include "rocrand/rocrand_uniform.h"
48 namespace rocrand_device {
51 __forceinline__ __device__ __host__ float2 box_muller(
unsigned int x,
unsigned int y)
54 float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV);
55 float v = ROCRAND_2POW32_INV_2PI + (y * ROCRAND_2POW32_INV_2PI);
56 float s = sqrtf(-2.0f * logf(u));
57 #ifdef __HIP_DEVICE_COMPILE__
58 __sincosf(v, &result.x, &result.y);
62 result.x = sinf(v) * s;
63 result.y = cosf(v) * s;
68 __forceinline__ __device__ __host__ float2 box_muller(
unsigned long long v)
70 unsigned int x =
static_cast<unsigned int>(v);
71 unsigned int y =
static_cast<unsigned int>(v >> 32);
73 return box_muller(x, y);
76 __forceinline__ __device__ __host__ double2 box_muller_double(uint4 v)
79 unsigned long long int v1 = (
unsigned long long int)v.x ^
80 ((
unsigned long long int)v.y << (53 - 32));
81 double u = ROCRAND_2POW53_INV_DOUBLE + (v1 * ROCRAND_2POW53_INV_DOUBLE);
82 unsigned long long int v2 = (
unsigned long long int)v.z ^
83 ((
unsigned long long int)v.w << (53 - 32));
84 double w = (ROCRAND_2POW53_INV_DOUBLE * 2.0) +
85 (v2 * (ROCRAND_2POW53_INV_DOUBLE * 2.0));
86 double s = sqrt(-2.0 * log(u));
87 #ifdef __HIP_DEVICE_COMPILE__
88 sincospi(w, &result.x, &result.y);
92 result.x = sin(w * ROCRAND_PI_DOUBLE) * s;
93 result.y = cos(w * ROCRAND_PI_DOUBLE) * s;
98 __forceinline__ __device__ __host__ double2 box_muller_double(ulonglong2 v)
100 unsigned int x =
static_cast<unsigned int>(v.x);
101 unsigned int y =
static_cast<unsigned int>(v.x >> 32);
102 unsigned int z =
static_cast<unsigned int>(v.y);
103 unsigned int w =
static_cast<unsigned int>(v.y >> 32);
105 return box_muller_double(make_uint4(x, y, z, w));
108 __forceinline__ __device__ __host__ __half2 box_muller_half(
unsigned short x,
unsigned short y)
110 #if defined(ROCRAND_HALF_MATH_SUPPORTED)
111 __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV));
112 __half v = __float2half(ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI));
113 __half s = hsqrt(__hmul(__float2half(-2.0f), hlog(u)));
120 float u = ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV);
121 float v = ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI);
122 float s = sqrtf(-2.0f * logf(u));
123 #ifdef __HIP_DEVICE_COMPILE__
124 __sincosf(v, &r.x, &r.y);
138 template<
typename state_type>
139 __forceinline__ __device__ __host__ float2 mrg_box_muller(
unsigned int x,
unsigned int y)
142 float u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
143 float v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * ROCRAND_2PI;
144 float s = sqrtf(-2.0f * logf(u));
145 #ifdef __HIP_DEVICE_COMPILE__
146 __sincosf(v, &result.x, &result.y);
150 result.x = sinf(v) * s;
151 result.y = cosf(v) * s;
156 template<
typename state_type>
157 __forceinline__ __device__ __host__ double2 mrg_box_muller_double(
unsigned int x,
unsigned int y)
160 double u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
161 double v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * 2.0;
162 double s = sqrt(-2.0 * log(u));
163 #ifdef __HIP_DEVICE_COMPILE__
164 sincospi(v, &result.x, &result.y);
168 result.x = sin(v * ROCRAND_PI_DOUBLE) * s;
169 result.y = cos(v * ROCRAND_PI_DOUBLE) * s;
174 __forceinline__ __device__ __host__
float roc_f_erfinv(
float x)
176 float tt1, tt2, lnx, sgn;
177 sgn = (x < 0.0f) ? -1.0f : 1.0f;
179 x = (1.0f - x) * (1.0f + x);
182 #ifdef __HIP_DEVICE_COMPILE__
188 #ifdef __HIP_DEVICE_COMPILE__
191 else if (std::isinf(lnx))
195 tt1 = 2.0f / (ROCRAND_PI * 0.147f) + 0.5f * lnx;
196 tt2 = 1.0f / (0.147f) * lnx;
198 return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2)));
201 __forceinline__ __device__ __host__
double roc_d_erfinv(
double x)
203 double tt1, tt2, lnx, sgn;
204 sgn = (x < 0.0) ? -1.0 : 1.0;
206 x = (1.0 - x) * (1.0 + x);
209 #ifdef __HIP_DEVICE_COMPILE__
215 #ifdef __HIP_DEVICE_COMPILE__
218 else if (std::isinf(lnx))
222 tt1 = 2.0 / (ROCRAND_PI_DOUBLE * 0.147) + 0.5 * lnx;
223 tt2 = 1.0 / (0.147) * lnx;
225 return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2)));
228 __forceinline__ __device__ __host__
float normal_distribution(
unsigned int x)
230 float p = ::rocrand_device::detail::uniform_distribution(x);
231 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
235 __forceinline__ __device__ __host__
float normal_distribution(
unsigned long long int x)
237 float p = ::rocrand_device::detail::uniform_distribution(x);
238 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
242 __forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned int v1,
unsigned int v2)
244 return ::rocrand_device::detail::box_muller(v1, v2);
247 __forceinline__ __device__ __host__ float2 normal_distribution2(uint2 v)
249 return ::rocrand_device::detail::box_muller(v.x, v.y);
252 __forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned long long v)
254 return ::rocrand_device::detail::box_muller(v);
257 __forceinline__ __device__ __host__ float4 normal_distribution4(uint4 v)
259 float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y);
260 float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w);
269 __forceinline__ __device__ __host__ float4 normal_distribution4(longlong2 v)
271 float2 r1 = ::rocrand_device::detail::box_muller(v.x);
272 float2 r2 = ::rocrand_device::detail::box_muller(v.y);
273 return float4{r1.x, r1.y, r2.x, r2.y};
276 __forceinline__ __device__ __host__ float4 normal_distribution4(
unsigned long long v1,
277 unsigned long long v2)
279 float2 r1 = ::rocrand_device::detail::box_muller(v1);
280 float2 r2 = ::rocrand_device::detail::box_muller(v2);
281 return float4{r1.x, r1.y, r2.x, r2.y};
284 __forceinline__ __device__ __host__
double normal_distribution_double(
unsigned int x)
286 double p = ::rocrand_device::detail::uniform_distribution_double(x);
287 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
291 __forceinline__ __device__ __host__
double normal_distribution_double(
unsigned long long int x)
293 double p = ::rocrand_device::detail::uniform_distribution_double(x);
294 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
298 __forceinline__ __device__ __host__ double2 normal_distribution_double2(uint4 v)
300 return ::rocrand_device::detail::box_muller_double(v);
303 __forceinline__ __device__ __host__ double2 normal_distribution_double2(ulonglong2 v)
305 return ::rocrand_device::detail::box_muller_double(v);
308 __forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned int v)
310 return ::rocrand_device::detail::box_muller_half(
311 static_cast<unsigned short>(v),
312 static_cast<unsigned short>(v >> 16)
316 __forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned long long v)
318 return ::rocrand_device::detail::box_muller_half(
static_cast<unsigned short>(v),
319 static_cast<unsigned short>(v >> 32));
322 template<
typename state_type>
323 __forceinline__ __device__ __host__ float2 mrg_normal_distribution2(
unsigned int v1,
326 return ::rocrand_device::detail::mrg_box_muller<state_type>(v1, v2);
329 template<
typename state_type>
330 __forceinline__ __device__ __host__ double2 mrg_normal_distribution_double2(
unsigned int v1,
333 return ::rocrand_device::detail::mrg_box_muller_double<state_type>(v1, v2);
336 template<
typename state_type>
337 __forceinline__ __device__ __host__ __half2 mrg_normal_distribution_half2(
unsigned int v)
339 v = rocrand_device::detail::mrg_uniform_distribution_uint<state_type>(v);
340 return ::rocrand_device::detail::box_muller_half(
341 static_cast<unsigned short>(v),
342 static_cast<unsigned short>(v >> 16)
363 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
364 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_philox4x32_10* state)
366 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
368 if(bm_helper::has_float(state))
370 return bm_helper::get_float(state);
376 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
377 bm_helper::save_float(state, r.y);
396 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_philox4x32_10* state)
401 return rocrand_device::detail::normal_distribution2(state1, state2);
418 __forceinline__ __device__ __host__ float4
rocrand_normal4(rocrand_state_philox4x32_10* state)
420 return rocrand_device::detail::normal_distribution4(
rocrand4(state));
437 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
440 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
442 if(bm_helper::has_double(state))
444 return bm_helper::get_double(state);
446 double2 r = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
447 bm_helper::save_double(state, r.y);
466 __forceinline__ __device__ __host__ double2
469 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
486 __forceinline__ __device__ __host__ double4
490 r1 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
491 r2 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
493 r1.x, r1.y, r2.x, r2.y
511 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
512 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_mrg31k3p* state)
514 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
516 if(bm_helper::has_float(state))
518 return bm_helper::get_float(state);
521 auto state1 = state->next();
522 auto state2 = state->next();
525 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
526 bm_helper::save_float(state, r.y);
545 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_mrg31k3p* state)
547 auto state1 = state->next();
548 auto state2 = state->next();
550 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
567 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
570 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
572 if(bm_helper::has_double(state))
574 return bm_helper::get_double(state);
577 auto state1 = state->next();
578 auto state2 = state->next();
581 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
583 bm_helper::save_double(state, r.y);
604 auto state1 = state->next();
605 auto state2 = state->next();
607 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
625 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
626 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_mrg32k3a* state)
628 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
630 if(bm_helper::has_float(state))
632 return bm_helper::get_float(state);
635 auto state1 = state->next();
636 auto state2 = state->next();
639 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
640 bm_helper::save_float(state, r.y);
659 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_mrg32k3a* state)
661 auto state1 = state->next();
662 auto state2 = state->next();
664 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
681 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
684 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
686 if(bm_helper::has_double(state))
688 return bm_helper::get_double(state);
691 auto state1 = state->next();
692 auto state2 = state->next();
695 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
697 bm_helper::save_double(state, r.y);
718 auto state1 = state->next();
719 auto state2 = state->next();
721 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
739 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
740 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_xorwow* state)
742 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
744 if(bm_helper::has_float(state))
746 return bm_helper::get_float(state);
750 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
751 bm_helper::save_float(state, r.y);
770 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_xorwow* state)
774 return rocrand_device::detail::normal_distribution2(state1, state2);
791 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
794 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
796 if(bm_helper::has_double(state))
798 return bm_helper::get_double(state);
806 double2 r = rocrand_device::detail::normal_distribution_double2(
807 uint4 { state1, state2, state3, state4 }
809 bm_helper::save_double(state, r.y);
835 return rocrand_device::detail::normal_distribution_double2(
836 uint4 { state1, state2, state3, state4 }
854 return rocrand_device::detail::normal_distribution(
rocrand(state));
875 return rocrand_device::detail::normal_distribution2(state1, state2);
892 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
916 return rocrand_device::detail::normal_distribution_double2(
917 uint4{state1, state2, state3, state4});
932 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_sobol32* state)
934 return rocrand_device::detail::normal_distribution(
rocrand(state));
951 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
966 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_scrambled_sobol32* state)
968 return rocrand_device::detail::normal_distribution(
rocrand(state));
983 __forceinline__ __device__ __host__
double
986 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1001 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_sobol64* state)
1003 return rocrand_device::detail::normal_distribution(
rocrand(state));
1020 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1035 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_scrambled_sobol64* state)
1037 return rocrand_device::detail::normal_distribution(
rocrand(state));
1052 __forceinline__ __device__ __host__
double
1055 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1070 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_lfsr113* state)
1072 return rocrand_device::detail::normal_distribution(
rocrand(state));
1094 return rocrand_device::detail::normal_distribution2(state1, state2);
1111 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1135 return rocrand_device::detail::normal_distribution_double2(
1136 uint4{state1, state2, state3, state4});
1151 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_threefry2x32_20* state)
1153 return rocrand_device::detail::normal_distribution(
rocrand(state));
1170 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_threefry2x32_20* state)
1172 return rocrand_device::detail::normal_distribution2(rocrand2(state));
1187 __forceinline__ __device__ __host__
double
1190 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1207 __forceinline__ __device__ __host__ double2
1210 auto state1 = rocrand2(state);
1211 auto state2 = rocrand2(state);
1213 return rocrand_device::detail::normal_distribution_double2(
1214 uint4{state1.x, state1.y, state2.x, state2.y});
1229 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_threefry2x64_20* state)
1231 return rocrand_device::detail::normal_distribution(
rocrand(state));
1248 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_threefry2x64_20* state)
1250 return rocrand_device::detail::normal_distribution2(
rocrand(state));
1265 __forceinline__ __device__ __host__
double
1268 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1285 __forceinline__ __device__ __host__ double2
1288 return rocrand_device::detail::normal_distribution_double2(rocrand2(state));
1303 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_threefry4x32_20* state)
1305 return rocrand_device::detail::normal_distribution(
rocrand(state));
1322 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_threefry4x32_20* state)
1327 return rocrand_device::detail::normal_distribution2(state1, state2);
1342 __forceinline__ __device__ __host__
double
1345 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1362 __forceinline__ __device__ __host__ double2
1365 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
1380 __forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_threefry4x64_20* state)
1382 return rocrand_device::detail::normal_distribution(
rocrand(state));
1399 __forceinline__ __device__ __host__ float2
rocrand_normal2(rocrand_state_threefry4x64_20* state)
1404 return rocrand_device::detail::normal_distribution2(state1, state2);
1419 __forceinline__ __device__ __host__
double
1422 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1439 __forceinline__ __device__ __host__ double2
1445 return rocrand_device::detail::normal_distribution_double2(ulonglong2{state1, state2});
__forceinline__ __device__ __host__ double4 rocrand_normal_double4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed double values.
Definition: rocrand_normal.h:487
__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed double values.
Definition: rocrand_normal.h:467
__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_philox4x32_10 *state)
Returns a normally distributed float value.
Definition: rocrand_normal.h:364
__forceinline__ __device__ __host__ uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:386
__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition: rocrand_normal.h:438
__forceinline__ __device__ __host__ float4 rocrand_normal4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed float values.
Definition: rocrand_normal.h:418
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:274
__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed float values.
Definition: rocrand_normal.h:396