21 #ifndef ROCRAND_MRG31K3P_H_
22 #define ROCRAND_MRG31K3P_H_
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_mrg31k3p_precomputed.h"
27 #define ROCRAND_MRG31K3P_M1 2147483647U
28 #define ROCRAND_MRG31K3P_M2 2147462579U
29 #define ROCRAND_MRG31K3P_MASK12 511U
30 #define ROCRAND_MRG31K3P_MASK13 16777215U
31 #define ROCRAND_MRG31K3P_MASK21 65535U
32 #define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10)
33 #define ROCRAND_MRG31K3P_UINT32_NORM \
34 (2.000000001396983862)
44 #define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL
47 namespace rocrand_device
58 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
64 unsigned int boxmuller_float_state;
65 unsigned int boxmuller_double_state;
66 float boxmuller_float;
67 double boxmuller_double;
71 __forceinline__ __device__ __host__ mrg31k3p_engine()
84 __forceinline__ __device__ __host__ mrg31k3p_engine(
const unsigned long long seed,
85 const unsigned long long subsequence,
86 const unsigned long long offset)
88 this->seed(seed, subsequence, offset);
99 __forceinline__ __device__ __host__
void seed(
unsigned long long seed_value,
100 const unsigned long long subsequence,
101 const unsigned long long offset)
107 unsigned int x =
static_cast<unsigned int>(seed_value ^ 0x55555555U);
108 unsigned int y =
static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
109 m_state.x1[0] = mod_mul_m1(x, seed_value);
110 m_state.x1[1] = mod_mul_m1(y, seed_value);
111 m_state.x1[2] = mod_mul_m1(x, seed_value);
112 m_state.x2[0] = mod_mul_m2(y, seed_value);
113 m_state.x2[1] = mod_mul_m2(x, seed_value);
114 m_state.x2[2] = mod_mul_m2(y, seed_value);
115 this->restart(subsequence, offset);
119 __forceinline__ __device__ __host__
void discard(
unsigned long long offset)
121 this->discard_impl(offset);
126 __forceinline__ __device__ __host__
void discard_subsequence(
unsigned long long subsequence)
128 this->discard_subsequence_impl(subsequence);
133 __forceinline__ __device__ __host__
void discard_sequence(
unsigned long long sequence)
135 this->discard_sequence_impl(sequence);
138 __forceinline__ __device__ __host__
void restart(
const unsigned long long subsequence,
139 const unsigned long long offset)
141 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
142 m_state.boxmuller_float_state = 0;
143 m_state.boxmuller_double_state = 0;
145 this->discard_subsequence_impl(subsequence);
146 this->discard_impl(offset);
149 __forceinline__ __device__ __host__
unsigned int operator()()
155 __forceinline__ __device__ __host__
unsigned int next()
159 = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
160 + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
161 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
162 tmp += m_state.x1[2];
163 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
164 m_state.x1[2] = m_state.x1[1];
165 m_state.x1[1] = m_state.x1[0];
169 tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
170 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
171 tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
172 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
173 tmp += 21069 * (m_state.x2[2] >> 16);
174 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
175 tmp += m_state.x2[2];
176 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
177 m_state.x2[2] = m_state.x2[1];
178 m_state.x2[1] = m_state.x2[0];
182 return m_state.x1[0] - m_state.x2[0]
183 + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
188 __forceinline__ __device__ __host__
void discard_impl(
unsigned long long offset)
190 discard_state(offset);
194 __forceinline__ __device__ __host__
void
195 discard_subsequence_impl(
unsigned long long subsequence)
199 while(subsequence > 0)
203 #if defined(__HIP_DEVICE_COMPILE__)
204 mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
205 mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
207 mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
208 mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
217 __forceinline__ __device__ __host__
void discard_sequence_impl(
unsigned long long sequence)
225 #if defined(__HIP_DEVICE_COMPILE__)
226 mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
227 mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
229 mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
230 mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
239 __forceinline__ __device__ __host__
void discard_state(
unsigned long long offset)
247 #if defined(__HIP_DEVICE_COMPILE__)
248 mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
249 mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
251 mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
252 mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
261 __forceinline__ __device__ __host__
void discard_state()
267 __forceinline__ __device__ __host__
static void mod_mat_vec_m1(
const unsigned int* A,
270 unsigned long long x[3] = {s[0], s[1], s[2]};
272 s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
274 s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
276 s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
279 __forceinline__ __device__ __host__
static void mod_mat_vec_m2(
const unsigned int* A,
282 unsigned long long x[3] = {s[0], s[1], s[2]};
284 s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
286 s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
288 s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
291 __forceinline__ __device__ __host__
static unsigned long long mod_mul_m1(
unsigned int i,
292 unsigned long long j)
294 return mod_m1(i * j);
297 __forceinline__ __device__ __host__
static unsigned long long mod_m1(
unsigned long long p)
299 return p % ROCRAND_MRG31K3P_M1;
302 __forceinline__ __device__ __host__
static unsigned long long mod_mul_m2(
unsigned int i,
303 unsigned long long j)
305 return mod_m2(i * j);
308 __forceinline__ __device__ __host__
static unsigned long long mod_m2(
unsigned long long p)
310 return p % ROCRAND_MRG31K3P_M2;
315 mrg31k3p_state m_state;
317 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
318 friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
330 typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
344 __forceinline__ __device__ __host__
void rocrand_init(
const unsigned long long seed,
345 const unsigned long long subsequence,
346 const unsigned long long offset,
347 rocrand_state_mrg31k3p* state)
349 *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
364 __forceinline__ __device__ __host__
unsigned int rocrand(rocrand_state_mrg31k3p* state)
367 return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
378 __forceinline__ __device__ __host__
void skipahead(
unsigned long long offset,
379 rocrand_state_mrg31k3p* state)
381 return state->discard(offset);
394 rocrand_state_mrg31k3p* state)
396 return state->discard_subsequence(subsequence);
409 rocrand_state_mrg31k3p* state)
411 return state->discard_sequence(sequence);
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition: rocrand_mrg31k3p.h:44
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg31k3p.h:364
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg31k3p.h:393
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition: rocrand_mrg31k3p.h:378
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition: rocrand_mrg31k3p.h:408
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition: rocrand_mrg31k3p.h:344