21 #ifndef ROCRAND_NORMAL_H_
22 #define ROCRAND_NORMAL_H_
25 #define FQUALIFIERS __forceinline__ __device__
35 #include "rocrand/rocrand_lfsr113.h"
36 #include "rocrand/rocrand_mrg31k3p.h"
37 #include "rocrand/rocrand_mrg32k3a.h"
38 #include "rocrand/rocrand_mtgp32.h"
39 #include "rocrand/rocrand_philox4x32_10.h"
40 #include "rocrand/rocrand_scrambled_sobol32.h"
41 #include "rocrand/rocrand_scrambled_sobol64.h"
42 #include "rocrand/rocrand_sobol32.h"
43 #include "rocrand/rocrand_sobol64.h"
44 #include "rocrand/rocrand_threefry2x32_20.h"
45 #include "rocrand/rocrand_threefry2x64_20.h"
46 #include "rocrand/rocrand_threefry4x32_20.h"
47 #include "rocrand/rocrand_threefry4x64_20.h"
48 #include "rocrand/rocrand_xorwow.h"
50 #include "rocrand/rocrand_uniform.h"
52 namespace rocrand_device {
56 float2 box_muller(
unsigned int x,
unsigned int y)
59 float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV);
60 float v = ROCRAND_2POW32_INV_2PI + (y * ROCRAND_2POW32_INV_2PI);
61 float s = sqrtf(-2.0f * logf(u));
62 #ifdef __HIP_DEVICE_COMPILE__
63 __sincosf(v, &result.x, &result.y);
67 result.x = sinf(v) * s;
68 result.y = cosf(v) * s;
75 unsigned int x =
static_cast<unsigned int>(v);
76 unsigned int y =
static_cast<unsigned int>(v >> 32);
78 return box_muller(x, y);
82 double2 box_muller_double(uint4 v)
85 unsigned long long int v1 = (
unsigned long long int)v.x ^
86 ((
unsigned long long int)v.y << (53 - 32));
87 double u = ROCRAND_2POW53_INV_DOUBLE + (v1 * ROCRAND_2POW53_INV_DOUBLE);
88 unsigned long long int v2 = (
unsigned long long int)v.z ^
89 ((
unsigned long long int)v.w << (53 - 32));
90 double w = (ROCRAND_2POW53_INV_DOUBLE * 2.0) +
91 (v2 * (ROCRAND_2POW53_INV_DOUBLE * 2.0));
92 double s = sqrt(-2.0 * log(u));
93 #ifdef __HIP_DEVICE_COMPILE__
94 sincospi(w, &result.x, &result.y);
98 result.x = sin(w * ROCRAND_PI_DOUBLE) * s;
99 result.y = cos(w * ROCRAND_PI_DOUBLE) * s;
104 FQUALIFIERS double2 box_muller_double(ulonglong2 v)
106 unsigned int x =
static_cast<unsigned int>(v.x);
107 unsigned int y =
static_cast<unsigned int>(v.x >> 32);
108 unsigned int z =
static_cast<unsigned int>(v.y);
109 unsigned int w =
static_cast<unsigned int>(v.y >> 32);
111 return box_muller_double(make_uint4(x, y, z, w));
115 __half2 box_muller_half(
unsigned short x,
unsigned short y)
117 #if defined(ROCRAND_HALF_MATH_SUPPORTED)
118 __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV));
119 __half v = __float2half(ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI));
120 __half s = hsqrt(__hmul(__float2half(-2.0f), hlog(u)));
127 float u = ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV);
128 float v = ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI);
129 float s = sqrtf(-2.0f * logf(u));
130 #ifdef __HIP_DEVICE_COMPILE__
131 __sincosf(v, &r.x, &r.y);
145 template<
typename state_type>
146 FQUALIFIERS float2 mrg_box_muller(
unsigned int x,
unsigned int y)
149 float u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
150 float v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * ROCRAND_2PI;
151 float s = sqrtf(-2.0f * logf(u));
152 #ifdef __HIP_DEVICE_COMPILE__
153 __sincosf(v, &result.x, &result.y);
157 result.x = sinf(v) * s;
158 result.y = cosf(v) * s;
163 template<
typename state_type>
164 FQUALIFIERS double2 mrg_box_muller_double(
unsigned int x,
unsigned int y)
167 double u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
168 double v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * 2.0;
169 double s = sqrt(-2.0 * log(u));
170 #ifdef __HIP_DEVICE_COMPILE__
171 sincospi(v, &result.x, &result.y);
175 result.x = sin(v * ROCRAND_PI_DOUBLE) * s;
176 result.y = cos(v * ROCRAND_PI_DOUBLE) * s;
182 float roc_f_erfinv(
float x)
184 float tt1, tt2, lnx, sgn;
185 sgn = (x < 0.0f) ? -1.0f : 1.0f;
187 x = (1.0f - x) * (1.0f + x);
190 #ifdef __HIP_DEVICE_COMPILE__
196 #ifdef __HIP_DEVICE_COMPILE__
199 else if (std::isinf(lnx))
203 tt1 = 2.0f / (ROCRAND_PI * 0.147f) + 0.5f * lnx;
204 tt2 = 1.0f / (0.147f) * lnx;
206 return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2)));
210 double roc_d_erfinv(
double x)
212 double tt1, tt2, lnx, sgn;
213 sgn = (x < 0.0) ? -1.0 : 1.0;
215 x = (1.0 - x) * (1.0 + x);
218 #ifdef __HIP_DEVICE_COMPILE__
224 #ifdef __HIP_DEVICE_COMPILE__
227 else if (std::isinf(lnx))
231 tt1 = 2.0 / (ROCRAND_PI_DOUBLE * 0.147) + 0.5 * lnx;
232 tt2 = 1.0 / (0.147) * lnx;
234 return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2)));
238 float normal_distribution(
unsigned int x)
240 float p = ::rocrand_device::detail::uniform_distribution(x);
241 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
246 float normal_distribution(
unsigned long long int x)
248 float p = ::rocrand_device::detail::uniform_distribution(x);
249 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
254 float2 normal_distribution2(
unsigned int v1,
unsigned int v2)
256 return ::rocrand_device::detail::box_muller(v1, v2);
261 return ::rocrand_device::detail::box_muller(v.x, v.y);
264 FQUALIFIERS float2 normal_distribution2(
unsigned long long v)
266 return ::rocrand_device::detail::box_muller(v);
270 float4 normal_distribution4(uint4 v)
272 float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y);
273 float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w);
282 FQUALIFIERS float4 normal_distribution4(longlong2 v)
284 float2 r1 = ::rocrand_device::detail::box_muller(v.x);
285 float2 r2 = ::rocrand_device::detail::box_muller(v.y);
286 return float4{r1.x, r1.y, r2.x, r2.y};
289 FQUALIFIERS float4 normal_distribution4(
unsigned long long v1,
unsigned long long v2)
291 float2 r1 = ::rocrand_device::detail::box_muller(v1);
292 float2 r2 = ::rocrand_device::detail::box_muller(v2);
293 return float4{r1.x, r1.y, r2.x, r2.y};
297 double normal_distribution_double(
unsigned int x)
299 double p = ::rocrand_device::detail::uniform_distribution_double(x);
300 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
305 double normal_distribution_double(
unsigned long long int x)
307 double p = ::rocrand_device::detail::uniform_distribution_double(x);
308 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
313 double2 normal_distribution_double2(uint4 v)
315 return ::rocrand_device::detail::box_muller_double(v);
318 FQUALIFIERS double2 normal_distribution_double2(ulonglong2 v)
320 return ::rocrand_device::detail::box_muller_double(v);
324 __half2 normal_distribution_half2(
unsigned int v)
326 return ::rocrand_device::detail::box_muller_half(
327 static_cast<unsigned short>(v),
328 static_cast<unsigned short>(v >> 16)
332 FQUALIFIERS __half2 normal_distribution_half2(
unsigned long long v)
334 return ::rocrand_device::detail::box_muller_half(
static_cast<unsigned short>(v),
335 static_cast<unsigned short>(v >> 32));
338 template<
typename state_type>
339 FQUALIFIERS float2 mrg_normal_distribution2(
unsigned int v1,
unsigned int v2)
341 return ::rocrand_device::detail::mrg_box_muller<state_type>(v1, v2);
344 template<
typename state_type>
345 FQUALIFIERS double2 mrg_normal_distribution_double2(
unsigned int v1,
unsigned int v2)
347 return ::rocrand_device::detail::mrg_box_muller_double<state_type>(v1, v2);
350 template<
typename state_type>
351 FQUALIFIERS __half2 mrg_normal_distribution_half2(
unsigned int v)
353 v = rocrand_device::detail::mrg_uniform_distribution_uint<state_type>(v);
354 return ::rocrand_device::detail::box_muller_half(
355 static_cast<unsigned short>(v),
356 static_cast<unsigned short>(v >> 16)
377 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
381 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
383 if(bm_helper::has_float(state))
385 return bm_helper::get_float(state);
391 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
392 bm_helper::save_float(state, r.y);
417 return rocrand_device::detail::normal_distribution2(state1, state2);
437 return rocrand_device::detail::normal_distribution4(
rocrand4(state));
454 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
458 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
460 if(bm_helper::has_double(state))
462 return bm_helper::get_double(state);
464 double2 r = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
465 bm_helper::save_double(state, r.y);
487 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
508 r1 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
509 r2 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
511 r1.x, r1.y, r2.x, r2.y
529 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE
532 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
534 if(bm_helper::has_float(state))
536 return bm_helper::get_float(state);
539 auto state1 = state->next();
540 auto state2 = state->next();
543 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
544 bm_helper::save_float(state, r.y);
565 auto state1 = state->next();
566 auto state2 = state->next();
568 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
585 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE
588 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
590 if(bm_helper::has_double(state))
592 return bm_helper::get_double(state);
595 auto state1 = state->next();
596 auto state2 = state->next();
599 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
601 bm_helper::save_double(state, r.y);
622 auto state1 = state->next();
623 auto state2 = state->next();
625 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
643 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
647 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
649 if(bm_helper::has_float(state))
651 return bm_helper::get_float(state);
654 auto state1 = state->next();
655 auto state2 = state->next();
658 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
659 bm_helper::save_float(state, r.y);
681 auto state1 = state->next();
682 auto state2 = state->next();
684 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
701 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
705 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
707 if(bm_helper::has_double(state))
709 return bm_helper::get_double(state);
712 auto state1 = state->next();
713 auto state2 = state->next();
716 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
718 bm_helper::save_double(state, r.y);
740 auto state1 = state->next();
741 auto state2 = state->next();
743 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
761 #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE
765 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
767 if(bm_helper::has_float(state))
769 return bm_helper::get_float(state);
773 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
774 bm_helper::save_float(state, r.y);
798 return rocrand_device::detail::normal_distribution2(state1, state2);
815 #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE
819 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
821 if(bm_helper::has_double(state))
823 return bm_helper::get_double(state);
831 double2 r = rocrand_device::detail::normal_distribution_double2(
832 uint4 { state1, state2, state3, state4 }
834 bm_helper::save_double(state, r.y);
861 return rocrand_device::detail::normal_distribution_double2(
862 uint4 { state1, state2, state3, state4 }
881 return rocrand_device::detail::normal_distribution(
rocrand(state));
899 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
917 return rocrand_device::detail::normal_distribution(
rocrand(state));
935 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
953 return rocrand_device::detail::normal_distribution(
rocrand(state));
971 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
989 return rocrand_device::detail::normal_distribution(
rocrand(state));
1007 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1025 return rocrand_device::detail::normal_distribution(
rocrand(state));
1043 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1061 return rocrand_device::detail::normal_distribution(
rocrand(state));
1084 return rocrand_device::detail::normal_distribution2(state1, state2);
1102 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1127 return rocrand_device::detail::normal_distribution_double2(
1128 uint4{state1, state2, state3, state4});
1145 return rocrand_device::detail::normal_distribution(
rocrand(state));
1164 return rocrand_device::detail::normal_distribution2(rocrand2(state));
1181 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1200 auto state1 = rocrand2(state);
1201 auto state2 = rocrand2(state);
1203 return rocrand_device::detail::normal_distribution_double2(
1204 uint4{state1.x, state1.y, state2.x, state2.y});
1221 return rocrand_device::detail::normal_distribution(
rocrand(state));
1240 return rocrand_device::detail::normal_distribution2(
rocrand(state));
1257 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1276 return rocrand_device::detail::normal_distribution_double2(rocrand2(state));
1293 return rocrand_device::detail::normal_distribution(
rocrand(state));
1315 return rocrand_device::detail::normal_distribution2(state1, state2);
1332 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1351 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
1368 return rocrand_device::detail::normal_distribution(
rocrand(state));
1390 return rocrand_device::detail::normal_distribution2(state1, state2);
1407 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1429 return rocrand_device::detail::normal_distribution_double2(ulonglong2{state1, state2});
FQUALIFIERS float2 rocrand_normal2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed float values.
Definition: rocrand_normal.h:412
FQUALIFIERS double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition: rocrand_normal.h:456
FQUALIFIERS double4 rocrand_normal_double4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed double values.
Definition: rocrand_normal.h:505
FQUALIFIERS unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:253
FQUALIFIERS float4 rocrand_normal4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed float values.
Definition: rocrand_normal.h:435
FQUALIFIERS float rocrand_normal(rocrand_state_philox4x32_10 *state)
Returns a normally distributed float value.
Definition: rocrand_normal.h:379
FQUALIFIERS uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:410
FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed double values.
Definition: rocrand_normal.h:485
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31