21 #ifndef ROCRAND_MRG31K3P_H_ 
   22 #define ROCRAND_MRG31K3P_H_ 
   24 #include "rocrand/rocrand_common.h" 
   25 #include "rocrand/rocrand_mrg31k3p_precomputed.h" 
   27 #define ROCRAND_MRG31K3P_M1 2147483647U  
   28 #define ROCRAND_MRG31K3P_M2 2147462579U  
   29 #define ROCRAND_MRG31K3P_MASK12 511U  
   30 #define ROCRAND_MRG31K3P_MASK13 16777215U  
   31 #define ROCRAND_MRG31K3P_MASK21 65535U  
   32 #define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10)  
   33 #define ROCRAND_MRG31K3P_UINT32_NORM \ 
   34     (2.000000001396983862)  
   44 #define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL  
   47 namespace rocrand_device
 
   58 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
   64         unsigned int boxmuller_float_state; 
 
   65         unsigned int boxmuller_double_state; 
 
   66         float        boxmuller_float; 
 
   67         double       boxmuller_double; 
 
   71     __forceinline__ __device__ __host__ mrg31k3p_engine()
 
   84     __forceinline__ __device__ __host__ mrg31k3p_engine(
const unsigned long long seed,
 
   85                                                         const unsigned long long subsequence,
 
   86                                                         const unsigned long long offset)
 
   88         this->seed(seed, subsequence, offset);
 
   99     __forceinline__ __device__ __host__ 
void seed(
unsigned long long       seed_value,
 
  100                                                   const unsigned long long subsequence,
 
  101                                                   const unsigned long long offset)
 
  107         unsigned int x = 
static_cast<unsigned int>(seed_value ^ 0x55555555U);
 
  108         unsigned int y = 
static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
 
  109         m_state.x1[0]  = mod_mul_m1(x, seed_value);
 
  110         m_state.x1[1]  = mod_mul_m1(y, seed_value);
 
  111         m_state.x1[2]  = mod_mul_m1(x, seed_value);
 
  112         m_state.x2[0]  = mod_mul_m2(y, seed_value);
 
  113         m_state.x2[1]  = mod_mul_m2(x, seed_value);
 
  114         m_state.x2[2]  = mod_mul_m2(y, seed_value);
 
  115         this->restart(subsequence, offset);
 
  119     __forceinline__ __device__ __host__ 
void discard(
unsigned long long offset)
 
  121         this->discard_impl(offset);
 
  126     __forceinline__ __device__ __host__ 
void discard_subsequence(
unsigned long long subsequence)
 
  128         this->discard_subsequence_impl(subsequence);
 
  133     __forceinline__ __device__ __host__ 
void discard_sequence(
unsigned long long sequence)
 
  135         this->discard_sequence_impl(sequence);
 
  138     __forceinline__ __device__ __host__ 
void restart(
const unsigned long long subsequence,
 
  139                                                      const unsigned long long offset)
 
  141 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  142         m_state.boxmuller_float_state  = 0;
 
  143         m_state.boxmuller_double_state = 0;
 
  145         this->discard_subsequence_impl(subsequence);
 
  146         this->discard_impl(offset);
 
  149     __forceinline__ __device__ __host__ 
unsigned int operator()()
 
  155     __forceinline__ __device__ __host__ 
unsigned int next()
 
  159             = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
 
  160               + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
 
  161         tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
 
  162         tmp += m_state.x1[2];
 
  163         tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
 
  164         m_state.x1[2] = m_state.x1[1];
 
  165         m_state.x1[1] = m_state.x1[0];
 
  169         tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
 
  170         tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
 
  171         tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
 
  172         tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
 
  173         tmp += 21069 * (m_state.x2[2] >> 16);
 
  174         tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
 
  175         tmp += m_state.x2[2];
 
  176         tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
 
  177         m_state.x2[2] = m_state.x2[1];
 
  178         m_state.x2[1] = m_state.x2[0];
 
  182         return m_state.x1[0] - m_state.x2[0]
 
  183                + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
 
  188     __forceinline__ __device__ __host__ 
void discard_impl(
unsigned long long offset)
 
  190         discard_state(offset);
 
  194     __forceinline__ __device__ __host__ 
void 
  195         discard_subsequence_impl(
unsigned long long subsequence)
 
  199         while(subsequence > 0)
 
  203 #if defined(__HIP_DEVICE_COMPILE__) 
  204                 mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
 
  205                 mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
 
  207                 mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
 
  208                 mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
 
  217     __forceinline__ __device__ __host__ 
void discard_sequence_impl(
unsigned long long sequence)
 
  225 #if defined(__HIP_DEVICE_COMPILE__) 
  226                 mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
 
  227                 mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
 
  229                 mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
 
  230                 mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
 
  239     __forceinline__ __device__ __host__ 
void discard_state(
unsigned long long offset)
 
  247 #if defined(__HIP_DEVICE_COMPILE__) 
  248                 mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
 
  249                 mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
 
  251                 mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
 
  252                 mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
 
  261     __forceinline__ __device__ __host__ 
void discard_state()
 
  267     __forceinline__ __device__ __host__ 
static void mod_mat_vec_m1(
const unsigned int* A,
 
  270         unsigned long long x[3] = {s[0], s[1], s[2]};
 
  272         s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
 
  274         s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
 
  276         s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
 
  279     __forceinline__ __device__ __host__ 
static void mod_mat_vec_m2(
const unsigned int* A,
 
  282         unsigned long long x[3] = {s[0], s[1], s[2]};
 
  284         s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
 
  286         s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
 
  288         s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
 
  291     __forceinline__ __device__ __host__ 
static unsigned long long mod_mul_m1(
unsigned int       i,
 
  292                                                                              unsigned long long j)
 
  294         return mod_m1(i * j);
 
  297     __forceinline__ __device__ __host__ 
static unsigned long long mod_m1(
unsigned long long p)
 
  299         return p % ROCRAND_MRG31K3P_M1;
 
  302     __forceinline__ __device__ __host__ 
static unsigned long long mod_mul_m2(
unsigned int       i,
 
  303                                                                              unsigned long long j)
 
  305         return mod_m2(i * j);
 
  308     __forceinline__ __device__ __host__ 
static unsigned long long mod_m2(
unsigned long long p)
 
  310         return p % ROCRAND_MRG31K3P_M2;
 
  315     mrg31k3p_state m_state;
 
  317 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  318     friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
 
  330 typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
 
  344 __forceinline__ __device__ __host__ 
void rocrand_init(
const unsigned long long seed,
 
  345                                                       const unsigned long long subsequence,
 
  346                                                       const unsigned long long offset,
 
  347                                                       rocrand_state_mrg31k3p*  state)
 
  349     *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
 
  364 __forceinline__ __device__ __host__ 
unsigned int rocrand(rocrand_state_mrg31k3p* state)
 
  367     return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
 
  378 __forceinline__ __device__ __host__ 
void skipahead(
unsigned long long      offset,
 
  379                                                    rocrand_state_mrg31k3p* state)
 
  381     return state->discard(offset);
 
  394                                                                rocrand_state_mrg31k3p* state)
 
  396     return state->discard_subsequence(subsequence);
 
  409                                                             rocrand_state_mrg31k3p* state)
 
  411     return state->discard_sequence(sequence);
 
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition: rocrand_mrg31k3p.h:44
 
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg31k3p.h:364
 
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg31k3p.h:393
 
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition: rocrand_mrg31k3p.h:378
 
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition: rocrand_mrg31k3p.h:408
 
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition: rocrand_mrg31k3p.h:344