21 #ifndef ROCRAND_NORMAL_H_ 
   22 #define ROCRAND_NORMAL_H_ 
   29 #include "rocrand/rocrand_lfsr113.h" 
   30 #include "rocrand/rocrand_mrg31k3p.h" 
   31 #include "rocrand/rocrand_mrg32k3a.h" 
   32 #include "rocrand/rocrand_mtgp32.h" 
   33 #include "rocrand/rocrand_philox4x32_10.h" 
   34 #include "rocrand/rocrand_scrambled_sobol32.h" 
   35 #include "rocrand/rocrand_scrambled_sobol64.h" 
   36 #include "rocrand/rocrand_sobol32.h" 
   37 #include "rocrand/rocrand_sobol64.h" 
   38 #include "rocrand/rocrand_threefry2x32_20.h" 
   39 #include "rocrand/rocrand_threefry2x64_20.h" 
   40 #include "rocrand/rocrand_threefry4x32_20.h" 
   41 #include "rocrand/rocrand_threefry4x64_20.h" 
   42 #include "rocrand/rocrand_xorwow.h" 
   44 #include "rocrand/rocrand_uniform.h" 
   46 #include <hip/hip_runtime.h> 
   50 namespace rocrand_device {
 
   53 __forceinline__ __device__ __host__ float2 box_muller(
unsigned int x, 
unsigned int y)
 
   56     float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV);
 
   57     float v = ROCRAND_2POW32_INV_2PI + (y * ROCRAND_2POW32_INV_2PI);
 
   58     float s = sqrtf(-2.0f * logf(u));
 
   59     #ifdef __HIP_DEVICE_COMPILE__ 
   60         __sincosf(v, &result.x, &result.y);
 
   64         result.x = sinf(v) * s;
 
   65         result.y = cosf(v) * s;
 
   70 __forceinline__ __device__ __host__ float2 box_muller(
unsigned long long v)
 
   72     unsigned int x = 
static_cast<unsigned int>(v);
 
   73     unsigned int y = 
static_cast<unsigned int>(v >> 32);
 
   75     return box_muller(x, y);
 
   78 __forceinline__ __device__ __host__ double2 box_muller_double(uint4 v)
 
   81     unsigned long long int v1 = (
unsigned long long int)v.x ^
 
   82         ((
unsigned long long int)v.y << (53 - 32));
 
   83     double u = ROCRAND_2POW53_INV_DOUBLE + (v1 * ROCRAND_2POW53_INV_DOUBLE);
 
   84     unsigned long long int v2 = (
unsigned long long int)v.z ^
 
   85         ((
unsigned long long int)v.w << (53 - 32));
 
   86     double w = (ROCRAND_2POW53_INV_DOUBLE * 2.0) +
 
   87         (v2 * (ROCRAND_2POW53_INV_DOUBLE * 2.0));
 
   88     double s = sqrt(-2.0 * log(u));
 
   89     #ifdef __HIP_DEVICE_COMPILE__ 
   90         sincospi(w, &result.x, &result.y);
 
   94         result.x = sin(w * ROCRAND_PI_DOUBLE) * s;
 
   95         result.y = cos(w * ROCRAND_PI_DOUBLE) * s;
 
  100 __forceinline__ __device__ __host__ double2 box_muller_double(ulonglong2 v)
 
  102     unsigned int x = 
static_cast<unsigned int>(v.x);
 
  103     unsigned int y = 
static_cast<unsigned int>(v.x >> 32);
 
  104     unsigned int z = 
static_cast<unsigned int>(v.y);
 
  105     unsigned int w = 
static_cast<unsigned int>(v.y >> 32);
 
  107     return box_muller_double(make_uint4(x, y, z, w));
 
  110 __forceinline__ __device__ __host__ __half2 box_muller_half(
unsigned short x, 
unsigned short y)
 
  112     #if defined(ROCRAND_HALF_MATH_SUPPORTED) 
  113     __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV));
 
  114     __half v = __float2half(ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI));
 
  115     __half s = hsqrt(__hmul(__float2half(-2.0f), hlog(u)));
 
  122     float u = ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV);
 
  123     float v = ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI);
 
  124     float s = sqrtf(-2.0f * logf(u));
 
  125     #ifdef __HIP_DEVICE_COMPILE__ 
  126         __sincosf(v, &r.x, &r.y);
 
  140 template<
typename state_type>
 
  141 __forceinline__ __device__ __host__ float2 mrg_box_muller(
unsigned int x, 
unsigned int y)
 
  144     float  u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
 
  145     float  v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * ROCRAND_2PI;
 
  146     float s = sqrtf(-2.0f * logf(u));
 
  147     #ifdef __HIP_DEVICE_COMPILE__ 
  148         __sincosf(v, &result.x, &result.y);
 
  152         result.x = sinf(v) * s;
 
  153         result.y = cosf(v) * s;
 
  158 template<
typename state_type>
 
  159 __forceinline__ __device__ __host__ double2 mrg_box_muller_double(
unsigned int x, 
unsigned int y)
 
  162     double  u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
 
  163     double  v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * 2.0;
 
  164     double s = sqrt(-2.0 * log(u));
 
  165     #ifdef __HIP_DEVICE_COMPILE__ 
  166         sincospi(v, &result.x, &result.y);
 
  170         result.x = sin(v * ROCRAND_PI_DOUBLE) * s;
 
  171         result.y = cos(v * ROCRAND_PI_DOUBLE) * s;
 
  176 __forceinline__ __device__ __host__ 
float roc_f_erfinv(
float x)
 
  178     float tt1, tt2, lnx, sgn;
 
  179     sgn = (x < 0.0f) ? -1.0f : 1.0f;
 
  181     x = (1.0f - x) * (1.0f + x);
 
  184     #ifdef __HIP_DEVICE_COMPILE__ 
  190     #ifdef __HIP_DEVICE_COMPILE__ 
  193     else if (std::isinf(lnx))
 
  197     tt1 = 2.0f / (ROCRAND_PI * 0.147f) + 0.5f * lnx;
 
  198     tt2 = 1.0f / (0.147f) * lnx;
 
  200     return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2)));
 
  203 __forceinline__ __device__ __host__ 
double roc_d_erfinv(
double x)
 
  205     double tt1, tt2, lnx, sgn;
 
  206     sgn = (x < 0.0) ? -1.0 : 1.0;
 
  208     x = (1.0 - x) * (1.0 + x);
 
  211     #ifdef __HIP_DEVICE_COMPILE__ 
  217     #ifdef __HIP_DEVICE_COMPILE__ 
  220     else if (std::isinf(lnx))
 
  224     tt1 = 2.0 / (ROCRAND_PI_DOUBLE * 0.147) + 0.5 * lnx;
 
  225     tt2 = 1.0 / (0.147) * lnx;
 
  227     return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2)));
 
  230 __forceinline__ __device__ __host__ 
float normal_distribution(
unsigned int x)
 
  232     float p = ::rocrand_device::detail::uniform_distribution(x);
 
  233     float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
 
  237 __forceinline__ __device__ __host__ 
float normal_distribution(
unsigned long long int x)
 
  239     float p = ::rocrand_device::detail::uniform_distribution(x);
 
  240     float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
 
  244 __forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned int v1, 
unsigned int v2)
 
  246     return ::rocrand_device::detail::box_muller(v1, v2);
 
  249 __forceinline__ __device__ __host__ float2 normal_distribution2(uint2 v)
 
  251     return ::rocrand_device::detail::box_muller(v.x, v.y);
 
  254 __forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned long long v)
 
  256     return ::rocrand_device::detail::box_muller(v);
 
  259 __forceinline__ __device__ __host__ float4 normal_distribution4(uint4 v)
 
  261     float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y);
 
  262     float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w);
 
  271 __forceinline__ __device__ __host__ float4 normal_distribution4(longlong2 v)
 
  273     float2 r1 = ::rocrand_device::detail::box_muller(v.x);
 
  274     float2 r2 = ::rocrand_device::detail::box_muller(v.y);
 
  275     return float4{r1.x, r1.y, r2.x, r2.y};
 
  278 __forceinline__ __device__ __host__ float4 normal_distribution4(
unsigned long long v1,
 
  279                                                                 unsigned long long v2)
 
  281     float2 r1 = ::rocrand_device::detail::box_muller(v1);
 
  282     float2 r2 = ::rocrand_device::detail::box_muller(v2);
 
  283     return float4{r1.x, r1.y, r2.x, r2.y};
 
  286 __forceinline__ __device__ __host__ 
double normal_distribution_double(
unsigned int x)
 
  288     double p = ::rocrand_device::detail::uniform_distribution_double(x);
 
  289     double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
 
  293 __forceinline__ __device__ __host__ 
double normal_distribution_double(
unsigned long long int x)
 
  295     double p = ::rocrand_device::detail::uniform_distribution_double(x);
 
  296     double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
 
  300 __forceinline__ __device__ __host__ double2 normal_distribution_double2(uint4 v)
 
  302     return ::rocrand_device::detail::box_muller_double(v);
 
  305 __forceinline__ __device__ __host__ double2 normal_distribution_double2(ulonglong2 v)
 
  307     return ::rocrand_device::detail::box_muller_double(v);
 
  310 __forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned int v)
 
  312     return ::rocrand_device::detail::box_muller_half(
 
  313         static_cast<unsigned short>(v),
 
  314         static_cast<unsigned short>(v >> 16)
 
  318 __forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned long long v)
 
  320     return ::rocrand_device::detail::box_muller_half(
static_cast<unsigned short>(v),
 
  321                                                      static_cast<unsigned short>(v >> 32));
 
  324 template<
typename state_type>
 
  325 __forceinline__ __device__ __host__ float2 mrg_normal_distribution2(
unsigned int v1,
 
  328     return ::rocrand_device::detail::mrg_box_muller<state_type>(v1, v2);
 
  331 template<
typename state_type>
 
  332 __forceinline__ __device__ __host__ double2 mrg_normal_distribution_double2(
unsigned int v1,
 
  335     return ::rocrand_device::detail::mrg_box_muller_double<state_type>(v1, v2);
 
  338 template<
typename state_type>
 
  339 __forceinline__ __device__ __host__ __half2 mrg_normal_distribution_half2(
unsigned int v)
 
  341     v = rocrand_device::detail::mrg_uniform_distribution_uint<state_type>(v);
 
  342     return ::rocrand_device::detail::box_muller_half(
 
  343         static_cast<unsigned short>(v),
 
  344         static_cast<unsigned short>(v >> 16)
 
  365 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  366 __forceinline__ __device__ __host__ 
float rocrand_normal(rocrand_state_philox4x32_10* state)
 
  368     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
 
  370     if(bm_helper::has_float(state))
 
  372         return bm_helper::get_float(state);
 
  378     float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
 
  379     bm_helper::save_float(state, r.y);
 
  398 __forceinline__ __device__ __host__
 
  404     return rocrand_device::detail::normal_distribution2(state1, state2);
 
  421 __forceinline__ __device__ __host__
 
  424     return rocrand_device::detail::normal_distribution4(
rocrand4(state));
 
  441 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  444     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
 
  446     if(bm_helper::has_double(state))
 
  448         return bm_helper::get_double(state);
 
  450     double2 r = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  451     bm_helper::save_double(state, r.y);
 
  470 __forceinline__ __device__ __host__
 
  473     return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  490 __forceinline__ __device__ __host__
 
  494     r1 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  495     r2 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
  497         r1.x, r1.y, r2.x, r2.y
 
  515 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  516 __forceinline__ __device__ __host__ 
float rocrand_normal(rocrand_state_mrg31k3p* state)
 
  518     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
 
  520     if(bm_helper::has_float(state))
 
  522         return bm_helper::get_float(state);
 
  525     auto state1 = state->next();
 
  526     auto state2 = state->next();
 
  529         = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
 
  530     bm_helper::save_float(state, r.y);
 
  549 __forceinline__ __device__ __host__
 
  552     auto state1 = state->next();
 
  553     auto state2 = state->next();
 
  555     return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
 
  572 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  575     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
 
  577     if(bm_helper::has_double(state))
 
  579         return bm_helper::get_double(state);
 
  582     auto state1 = state->next();
 
  583     auto state2 = state->next();
 
  586         = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
 
  588     bm_helper::save_double(state, r.y);
 
  607 __forceinline__ __device__ __host__
 
  610     auto state1 = state->next();
 
  611     auto state2 = state->next();
 
  613     return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
 
  631 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  632 __forceinline__ __device__ __host__ 
float rocrand_normal(rocrand_state_mrg32k3a* state)
 
  634     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
 
  636     if(bm_helper::has_float(state))
 
  638         return bm_helper::get_float(state);
 
  641     auto state1 = state->next();
 
  642     auto state2 = state->next();
 
  645         = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
 
  646     bm_helper::save_float(state, r.y);
 
  665 __forceinline__ __device__ __host__
 
  668     auto state1 = state->next();
 
  669     auto state2 = state->next();
 
  671     return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
 
  688 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  691     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
 
  693     if(bm_helper::has_double(state))
 
  695         return bm_helper::get_double(state);
 
  698     auto state1 = state->next();
 
  699     auto state2 = state->next();
 
  702         = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
 
  704     bm_helper::save_double(state, r.y);
 
  723 __forceinline__ __device__ __host__
 
  726     auto state1 = state->next();
 
  727     auto state2 = state->next();
 
  729     return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
 
  747 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  748 __forceinline__ __device__ __host__ 
float rocrand_normal(rocrand_state_xorwow* state)
 
  750     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
 
  752     if(bm_helper::has_float(state))
 
  754         return bm_helper::get_float(state);
 
  758     float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
 
  759     bm_helper::save_float(state, r.y);
 
  778 __forceinline__ __device__ __host__
 
  783     return rocrand_device::detail::normal_distribution2(state1, state2);
 
  800 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  803     typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
 
  805     if(bm_helper::has_double(state))
 
  807         return bm_helper::get_double(state);
 
  815     double2 r = rocrand_device::detail::normal_distribution_double2(
 
  816         uint4 { state1, state2, state3, state4 }
 
  818     bm_helper::save_double(state, r.y);
 
  837 __forceinline__ __device__ __host__
 
  845     return rocrand_device::detail::normal_distribution_double2(
 
  846         uint4 { state1, state2, state3, state4 }
 
  862 __forceinline__ __device__
 
  865     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
  882 __forceinline__ __device__
 
  887     return rocrand_device::detail::normal_distribution2(state1, state2);
 
  902 __forceinline__ __device__
 
  905     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
  922 __forceinline__ __device__
 
  930     return rocrand_device::detail::normal_distribution_double2(
 
  931         uint4{state1, state2, state3, state4});
 
  946 __forceinline__ __device__ __host__
 
  949     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
  964 __forceinline__ __device__ __host__
 
  967     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
  982 __forceinline__ __device__ __host__
 
  985     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1000 __forceinline__ __device__ __host__
 
 1003     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1018 __forceinline__ __device__ __host__
 
 1021     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1036 __forceinline__ __device__ __host__
 
 1039     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1054 __forceinline__ __device__ __host__
 
 1057     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1072 __forceinline__ __device__ __host__
 
 1075     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1090 __forceinline__ __device__ __host__
 
 1093     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1110 __forceinline__ __device__ __host__
 
 1116     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1131 __forceinline__ __device__ __host__
 
 1134     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1151 __forceinline__ __device__ __host__
 
 1159     return rocrand_device::detail::normal_distribution_double2(
 
 1160         uint4{state1, state2, state3, state4});
 
 1175 __forceinline__ __device__ __host__
 
 1178     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1195 __forceinline__ __device__ __host__
 
 1198     return rocrand_device::detail::normal_distribution2(rocrand2(state));
 
 1213 __forceinline__ __device__ __host__
 
 1216     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1233 __forceinline__ __device__ __host__
 
 1236     auto state1 = rocrand2(state);
 
 1237     auto state2 = rocrand2(state);
 
 1239     return rocrand_device::detail::normal_distribution_double2(
 
 1240         uint4{state1.x, state1.y, state2.x, state2.y});
 
 1255 __forceinline__ __device__ __host__
 
 1258     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1275 __forceinline__ __device__ __host__
 
 1278     return rocrand_device::detail::normal_distribution2(
rocrand(state));
 
 1293 __forceinline__ __device__ __host__
 
 1296     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1313 __forceinline__ __device__ __host__
 
 1316     return rocrand_device::detail::normal_distribution_double2(rocrand2(state));
 
 1331 __forceinline__ __device__ __host__
 
 1334     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1351 __forceinline__ __device__ __host__
 
 1357     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1372 __forceinline__ __device__ __host__
 
 1375     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1392 __forceinline__ __device__ __host__
 
 1395     return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
 
 1410 __forceinline__ __device__ __host__
 
 1413     return rocrand_device::detail::normal_distribution(
rocrand(state));
 
 1430 __forceinline__ __device__ __host__
 
 1436     return rocrand_device::detail::normal_distribution2(state1, state2);
 
 1451 __forceinline__ __device__ __host__
 
 1454     return rocrand_device::detail::normal_distribution_double(
rocrand(state));
 
 1471 __forceinline__ __device__ __host__
 
 1477     return rocrand_device::detail::normal_distribution_double2(ulonglong2{state1, state2});
 
__forceinline__ __device__ __host__ double4 rocrand_normal_double4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed double values.
Definition: rocrand_normal.h:491
 
__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed double values.
Definition: rocrand_normal.h:471
 
__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_philox4x32_10 *state)
Returns a normally distributed float value.
Definition: rocrand_normal.h:366
 
__forceinline__ __device__ __host__ uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:379
 
__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition: rocrand_normal.h:442
 
__forceinline__ __device__ __host__ float4 rocrand_normal4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed float values.
Definition: rocrand_normal.h:422
 
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:277
 
__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed float values.
Definition: rocrand_normal.h:399