21 #ifndef ROCRAND_NORMAL_H_ 
   22 #define ROCRAND_NORMAL_H_ 
   25 #define FQUALIFIERS __forceinline__ __device__ 
   35 #include "rocrand/rocrand_lfsr113.h" 
   36 #include "rocrand/rocrand_mrg31k3p.h" 
   37 #include "rocrand/rocrand_mrg32k3a.h" 
   38 #include "rocrand/rocrand_mtgp32.h" 
   39 #include "rocrand/rocrand_philox4x32_10.h" 
   40 #include "rocrand/rocrand_scrambled_sobol32.h" 
   41 #include "rocrand/rocrand_scrambled_sobol64.h" 
   42 #include "rocrand/rocrand_sobol32.h" 
   43 #include "rocrand/rocrand_sobol64.h" 
   44 #include "rocrand/rocrand_threefry2x32_20.h" 
   45 #include "rocrand/rocrand_threefry2x64_20.h" 
   46 #include "rocrand/rocrand_threefry4x32_20.h" 
   47 #include "rocrand/rocrand_threefry4x64_20.h" 
   48 #include "rocrand/rocrand_xorwow.h" 
   50 #include "rocrand/rocrand_uniform.h" 
   52 namespace rocrand_device {
 
   56 float2 box_muller(
unsigned int x, 
unsigned int y)
 
   59     float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV);
 
   60     float v = ROCRAND_2POW32_INV_2PI + (y * ROCRAND_2POW32_INV_2PI);
 
   61     float s = sqrtf(-2.0f * logf(u));
 
   62     #ifdef __HIP_DEVICE_COMPILE__ 
   63         __sincosf(v, &result.x, &result.y);
 
   67         result.x = sinf(v) * s;
 
   68         result.y = cosf(v) * s;
 
   75     unsigned int x = 
static_cast<unsigned int>(v);
 
   76     unsigned int y = 
static_cast<unsigned int>(v >> 32);
 
   78     return box_muller(x, y);
 
   82 double2 box_muller_double(uint4 v)
 
   85     unsigned long long int v1 = (
unsigned long long int)v.x ^
 
   86         ((
unsigned long long int)v.y << (53 - 32));
 
   87     double u = ROCRAND_2POW53_INV_DOUBLE + (v1 * ROCRAND_2POW53_INV_DOUBLE);
 
   88     unsigned long long int v2 = (
unsigned long long int)v.z ^
 
   89         ((
unsigned long long int)v.w << (53 - 32));
 
   90     double w = (ROCRAND_2POW53_INV_DOUBLE * 2.0) +
 
   91         (v2 * (ROCRAND_2POW53_INV_DOUBLE * 2.0));
 
   92     double s = sqrt(-2.0 * log(u));
 
   93     #ifdef __HIP_DEVICE_COMPILE__ 
   94         sincospi(w, &result.x, &result.y);
 
   98         result.x = sin(w * ROCRAND_PI_DOUBLE) * s;
 
   99         result.y = cos(w * ROCRAND_PI_DOUBLE) * s;
 
  104 FQUALIFIERS double2 box_muller_double(ulonglong2 v)
 
  106     unsigned int x = 
static_cast<unsigned int>(v.x);
 
  107     unsigned int y = 
static_cast<unsigned int>(v.x >> 32);
 
  108     unsigned int z = 
static_cast<unsigned int>(v.y);
 
  109     unsigned int w = 
static_cast<unsigned int>(v.y >> 32);
 
  111     return box_muller_double(make_uint4(x, y, z, w));
 
  115 __half2 box_muller_half(
unsigned short x, 
unsigned short y)
 
  117     #if defined(ROCRAND_HALF_MATH_SUPPORTED) 
  118     __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV));
 
  119     __half v = __float2half(ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI));
 
  120     __half s = hsqrt(__hmul(__float2half(-2.0f), hlog(u)));
 
  127     float u = ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV);
 
  128     float v = ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI);
 
  129     float s = sqrtf(-2.0f * logf(u));
 
  130     #ifdef __HIP_DEVICE_COMPILE__ 
  131         __sincosf(v, &r.x, &r.y);
 
  145 template<
typename state_type>
 
  146 FQUALIFIERS float2 mrg_box_muller(
unsigned int x, 
unsigned int y)
 
  149     float  u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
 
  150     float  v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * ROCRAND_2PI;
 
  151     float s = sqrtf(-2.0f * logf(u));
 
  152     #ifdef __HIP_DEVICE_COMPILE__ 
  153         __sincosf(v, &result.x, &result.y);
 
  157         result.x = sinf(v) * s;
 
  158         result.y = cosf(v) * s;
 
  163 template<
typename state_type>
 
  164 FQUALIFIERS double2 mrg_box_muller_double(
unsigned int x, 
unsigned int y)
 
  167     double  u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
 
  168     double  v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * 2.0;
 
  169     double s = sqrt(-2.0 * log(u));
 
  170     #ifdef __HIP_DEVICE_COMPILE__ 
  171         sincospi(v, &result.x, &result.y);
 
  175         result.x = sin(v * ROCRAND_PI_DOUBLE) * s;
 
  176         result.y = cos(v * ROCRAND_PI_DOUBLE) * s;
 
  182 float roc_f_erfinv(
float x)
 
  184     float tt1, tt2, lnx, sgn;
 
  185     sgn = (x < 0.0f) ? -1.0f : 1.0f;
 
  187     x = (1.0f - x) * (1.0f + x);
 
  190     #ifdef __HIP_DEVICE_COMPILE__ 
  196     #ifdef __HIP_DEVICE_COMPILE__ 
  199     else if (std::isinf(lnx))
 
  203     tt1 = 2.0f / (ROCRAND_PI * 0.147f) + 0.5f * lnx;
 
  204     tt2 = 1.0f / (0.147f) * lnx;
 
  206     return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2)));
 
  210 double roc_d_erfinv(
double x)
 
  212     double tt1, tt2, lnx, sgn;
 
  213     sgn = (x < 0.0) ? -1.0 : 1.0;
 
  215     x = (1.0 - x) * (1.0 + x);
 
  218     #ifdef __HIP_DEVICE_COMPILE__ 
  224     #ifdef __HIP_DEVICE_COMPILE__ 
  227     else if (std::isinf(lnx))
 
  231     tt1 = 2.0 / (ROCRAND_PI_DOUBLE * 0.147) + 0.5 * lnx;
 
  232     tt2 = 1.0 / (0.147) * lnx;
 
  234     return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2)));
 
  238 float normal_distribution(
unsigned int x)
 
  240     float p = ::rocrand_device::detail::uniform_distribution(x);
 
  241     float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
 
  246 float normal_distribution(
unsigned long long int x)
 
  248     float p = ::rocrand_device::detail::uniform_distribution(x);
 
  249     float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
 
  254 float2 normal_distribution2(
unsigned int v1, 
unsigned int v2)
 
  256     return ::rocrand_device::detail::box_muller(v1, v2);
 
  261     return ::rocrand_device::detail::box_muller(v.x, v.y);
 
  264 FQUALIFIERS float2 normal_distribution2(
unsigned long long v)
 
  266     return ::rocrand_device::detail::box_muller(v);
 
  270 float4 normal_distribution4(uint4 v)
 
  272     float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y);
 
  273     float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w);
 
  282 FQUALIFIERS float4 normal_distribution4(longlong2 v)
 
  284     float2 r1 = ::rocrand_device::detail::box_muller(v.x);
 
  285     float2 r2 = ::rocrand_device::detail::box_muller(v.y);
 
  286     return float4{r1.x, r1.y, r2.x, r2.y};
 
  289 FQUALIFIERS float4 normal_distribution4(
unsigned long long v1, 
unsigned long long v2)
 
  291     float2 r1 = ::rocrand_device::detail::box_muller(v1);
 
  292     float2 r2 = ::rocrand_device::detail::box_muller(v2);
 
  293     return float4{r1.x, r1.y, r2.x, r2.y};
 
  297 double normal_distribution_double(
unsigned int x)
 
  299     double p = ::rocrand_device::detail::uniform_distribution_double(x);
 
  300     double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
 
  305 double normal_distribution_double(
unsigned long long int x)
 
  307     double p = ::rocrand_device::detail::uniform_distribution_double(x);
 
  308     double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
 
  313 double2 normal_distribution_double2(uint4 v)
 
  315     return ::rocrand_device::detail::box_muller_double(v);
 
  318 FQUALIFIERS double2 normal_distribution_double2(ulonglong2 v)
 
  320     return ::rocrand_device::detail::box_muller_double(v);
 
  324 __half2 normal_distribution_half2(
unsigned int v)
 
  326     return ::rocrand_device::detail::box_muller_half(
 
  327         static_cast<unsigned short>(v),
 
  328         static_cast<unsigned short>(v >> 16)
 
  332 FQUALIFIERS __half2 normal_distribution_half2(
unsigned long long v)
 
  334     return ::rocrand_device::detail::box_muller_half(
static_cast<unsigned short>(v),
 
  335                                                      static_cast<unsigned short>(v >> 32));
 
  338 template<
typename state_type>
 
  339 FQUALIFIERS float2 mrg_normal_distribution2(
unsigned int v1, 
unsigned int v2)
 
  341     return ::rocrand_device::detail::mrg_box_muller<state_type>(v1, v2);
 
  344 template<
typename state_type>
 
  345 FQUALIFIERS double2 mrg_normal_distribution_double2(
unsigned int v1, 
unsigned int v2)
 
  347     return ::rocrand_device::detail::mrg_box_muller_double<state_type>(v1, v2);
 
  350 template<
typename state_type>
 
  351 FQUALIFIERS __half2 mrg_normal_distribution_half2(
unsigned int v)
 
  353     v = rocrand_device::detail::mrg_uniform_distribution_uint<state_type>(v);
 
  354     return ::rocrand_device::detail::box_muller_half(
 
  355         static_cast<unsigned short>(v),
 
  356         static_cast<unsigned short>(v >> 16)
 
  377 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE 
  381     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
 
  383     if(bm_helper::has_float(state))
 
  385         return bm_helper::get_float(state);
 
  391     float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
 
  392     bm_helper::save_float(state, r.y);
 
  417     return rocrand_device::detail::normal_distribution2(state1, state2);
 
  437     return rocrand_device::detail::normal_distribution4(
rocrand4(state));
 
  454 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE 
  458     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
 
  460     if(bm_helper::has_double(state))
 
  462         return bm_helper::get_double(state);
 
  464     double2 r = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  465     bm_helper::save_double(state, r.y);
 
  487     return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  508     r1 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  509     r2 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  511         r1.x, r1.y, r2.x, r2.y
 
  529 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE 
  532     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
 
  534     if(bm_helper::has_float(state))
 
  536         return bm_helper::get_float(state);
 
  539     auto state1 = state->next();
 
  540     auto state2 = state->next();
 
  543         = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
 
  544     bm_helper::save_float(state, r.y);
 
  565     auto state1 = state->next();
 
  566     auto state2 = state->next();
 
  568     return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
 
  585 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE 
  588     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
 
  590     if(bm_helper::has_double(state))
 
  592         return bm_helper::get_double(state);
 
  595     auto state1 = state->next();
 
  596     auto state2 = state->next();
 
  599         = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
 
  601     bm_helper::save_double(state, r.y);
 
  622     auto state1 = state->next();
 
  623     auto state2 = state->next();
 
  625     return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
 
  643 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE 
  647     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
 
  649     if(bm_helper::has_float(state))
 
  651         return bm_helper::get_float(state);
 
  654     auto state1 = state->next();
 
  655     auto state2 = state->next();
 
  658         = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
 
  659     bm_helper::save_float(state, r.y);
 
  681     auto state1 = state->next();
 
  682     auto state2 = state->next();
 
  684     return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
 
  701 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE 
  705     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
 
  707     if(bm_helper::has_double(state))
 
  709         return bm_helper::get_double(state);
 
  712     auto state1 = state->next();
 
  713     auto state2 = state->next();
 
  716         = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
 
  718     bm_helper::save_double(state, r.y);
 
  740     auto state1 = state->next();
 
  741     auto state2 = state->next();
 
  743     return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
 
  761 #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE 
  765     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
 
  767     if(bm_helper::has_float(state))
 
  769         return bm_helper::get_float(state);
 
  773     float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
 
  774     bm_helper::save_float(state, r.y);
 
  798     return rocrand_device::detail::normal_distribution2(state1, state2);
 
  815 #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE 
  819     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
 
  821     if(bm_helper::has_double(state))
 
  823         return bm_helper::get_double(state);
 
  831     double2 r = rocrand_device::detail::normal_distribution_double2(
 
  832         uint4 { state1, state2, state3, state4 }
 
  834     bm_helper::save_double(state, r.y);
 
  861     return rocrand_device::detail::normal_distribution_double2(
 
  862         uint4 { state1, state2, state3, state4 }
 
  881     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
  899     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
  917     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
  935     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
  953     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
  971     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
  989     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1007     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1025     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1043     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1061     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1084     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1102     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1127     return rocrand_device::detail::normal_distribution_double2(
 
 1128         uint4{state1, state2, state3, state4});
 
 1145     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1164     return rocrand_device::detail::normal_distribution2(rocrand2(state));
 
 1181     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1200     auto state1 = rocrand2(state);
 
 1201     auto state2 = rocrand2(state);
 
 1203     return rocrand_device::detail::normal_distribution_double2(
 
 1204         uint4{state1.x, state1.y, state2.x, state2.y});
 
 1221     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1240     return rocrand_device::detail::normal_distribution2(
rocrand(state));
 
 1257     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1276     return rocrand_device::detail::normal_distribution_double2(rocrand2(state));
 
 1293     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1315     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1332     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1351     return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
 1368     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1390     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1407     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1429     return rocrand_device::detail::normal_distribution_double2(ulonglong2{state1, state2});
 
FQUALIFIERS float2 rocrand_normal2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed float values.
Definition: rocrand_normal.h:412
FQUALIFIERS double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition: rocrand_normal.h:456
FQUALIFIERS double4 rocrand_normal_double4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed double values.
Definition: rocrand_normal.h:505
FQUALIFIERS unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:253
FQUALIFIERS float4 rocrand_normal4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed float values.
Definition: rocrand_normal.h:435
FQUALIFIERS float rocrand_normal(rocrand_state_philox4x32_10 *state)
Returns a normally distributed float value.
Definition: rocrand_normal.h:379
FQUALIFIERS uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:410
FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed double values.
Definition: rocrand_normal.h:485
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31