21 #ifndef ROCRAND_MRG32K3A_H_
22 #define ROCRAND_MRG32K3A_H_
25 #define FQUALIFIERS __forceinline__ __device__
28 #include "rocrand/rocrand_common.h"
29 #include "rocrand/rocrand_mrg32k3a_precomputed.h"
31 #define ROCRAND_MRG32K3A_POW32 4294967296
32 #define ROCRAND_MRG32K3A_M1 4294967087
33 #define ROCRAND_MRG32K3A_M1C 209
34 #define ROCRAND_MRG32K3A_M2 4294944443
35 #define ROCRAND_MRG32K3A_M2C 22853
36 #define ROCRAND_MRG32K3A_A12 1403580
37 #define ROCRAND_MRG32K3A_A13 (4294967087 - 810728)
38 #define ROCRAND_MRG32K3A_A13N 810728
39 #define ROCRAND_MRG32K3A_A21 527612
40 #define ROCRAND_MRG32K3A_A23 (4294944443 - 1370589)
41 #define ROCRAND_MRG32K3A_A23N 1370589
42 #define ROCRAND_MRG32K3A_NORM_DOUBLE (2.3283065498378288e-10)
43 #define ROCRAND_MRG32K3A_UINT_NORM (1.000000048661607)
53 #define ROCRAND_MRG32K3A_DEFAULT_SEED 12345ULL
56 namespace rocrand_device {
66 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
72 unsigned int boxmuller_float_state;
73 unsigned int boxmuller_double_state;
74 float boxmuller_float;
75 double boxmuller_double;
94 mrg32k3a_engine(
const unsigned long long seed,
95 const unsigned long long subsequence,
96 const unsigned long long offset)
98 this->seed(seed, subsequence, offset);
110 void seed(
unsigned long long seed_value,
111 const unsigned long long subsequence,
112 const unsigned long long offset)
118 unsigned int x = (
unsigned int) seed_value ^ 0x55555555U;
119 unsigned int y = (
unsigned int) ((seed_value >> 32) ^ 0xAAAAAAAAU);
120 m_state.g1[0] = mod_mul_m1(x, seed_value);
121 m_state.g1[1] = mod_mul_m1(y, seed_value);
122 m_state.g1[2] = mod_mul_m1(x, seed_value);
123 m_state.g2[0] = mod_mul_m2(y, seed_value);
124 m_state.g2[1] = mod_mul_m2(x, seed_value);
125 m_state.g2[2] = mod_mul_m2(y, seed_value);
126 this->restart(subsequence, offset);
131 void discard(
unsigned long long offset)
133 this->discard_impl(offset);
139 void discard_subsequence(
unsigned long long subsequence)
141 this->discard_subsequence_impl(subsequence);
147 void discard_sequence(
unsigned long long sequence)
149 this->discard_sequence_impl(sequence);
153 void restart(
const unsigned long long subsequence,
154 const unsigned long long offset)
156 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
157 m_state.boxmuller_float_state = 0;
158 m_state.boxmuller_double_state = 0;
160 this->discard_subsequence_impl(subsequence);
161 this->discard_impl(offset);
165 unsigned int operator()()
175 const unsigned int p1 = mod_m1(
177 ROCRAND_MRG32K3A_A12,
180 ROCRAND_MRG32K3A_A13N,
181 (ROCRAND_MRG32K3A_M1 - m_state.g1[0]),
187 m_state.g1[0] = m_state.g1[1]; m_state.g1[1] = m_state.g1[2];
190 const unsigned int p2 = mod_m2(
192 ROCRAND_MRG32K3A_A21,
195 ROCRAND_MRG32K3A_A23N,
196 (ROCRAND_MRG32K3A_M2 - m_state.g2[0]),
202 m_state.g2[0] = m_state.g2[1]; m_state.g2[1] = m_state.g2[2];
205 return (p1 - p2) + (p1 <= p2 ? ROCRAND_MRG32K3A_M1 : 0);
212 void discard_impl(
unsigned long long offset)
214 discard_state(offset);
219 void discard_subsequence_impl(
unsigned long long subsequence)
223 while(subsequence > 0) {
224 if (subsequence & 1) {
225 #if defined(__HIP_DEVICE_COMPILE__)
226 mod_mat_vec_m1(d_A1P76 + i, m_state.g1);
227 mod_mat_vec_m2(d_A2P76 + i, m_state.g2);
229 mod_mat_vec_m1(h_A1P76 + i, m_state.g1);
230 mod_mat_vec_m2(h_A2P76 + i, m_state.g2);
240 void discard_sequence_impl(
unsigned long long sequence)
244 while(sequence > 0) {
246 #if defined(__HIP_DEVICE_COMPILE__)
247 mod_mat_vec_m1(d_A1P127 + i, m_state.g1);
248 mod_mat_vec_m2(d_A2P127 + i, m_state.g2);
250 mod_mat_vec_m1(h_A1P127 + i, m_state.g1);
251 mod_mat_vec_m2(h_A2P127 + i, m_state.g2);
262 void discard_state(
unsigned long long offset)
268 #if defined(__HIP_DEVICE_COMPILE__)
269 mod_mat_vec_m1(d_A1 + i, m_state.g1);
270 mod_mat_vec_m2(d_A2 + i, m_state.g2);
272 mod_mat_vec_m1(h_A1 + i, m_state.g1);
273 mod_mat_vec_m2(h_A2 + i, m_state.g2);
291 static void mod_mat_vec_m1(
const unsigned long long * A,
294 unsigned long long x[3];
296 x[0] = mod_m1(mod_m1(A[0] * s[0])
297 + mod_m1(A[1] * s[1])
298 + mod_m1(A[2] * s[2]));
300 x[1] = mod_m1(mod_m1(A[3] * s[0])
301 + mod_m1(A[4] * s[1])
302 + mod_m1(A[5] * s[2]));
304 x[2] = mod_m1(mod_m1(A[6] * s[0])
305 + mod_m1(A[7] * s[1])
306 + mod_m1(A[8] * s[2]));
314 static void mod_mat_vec_m2(
const unsigned long long * A,
317 unsigned long long x[3];
319 x[0] = mod_m2(mod_m2(A[0] * s[0])
320 + mod_m2(A[1] * s[1])
321 + mod_m2(A[2] * s[2]));
323 x[1] = mod_m2(mod_m2(A[3] * s[0])
324 + mod_m2(A[4] * s[1])
325 + mod_m2(A[5] * s[2]));
327 x[2] = mod_m2(mod_m2(A[6] * s[0])
328 + mod_m2(A[7] * s[1])
329 + mod_m2(A[8] * s[2]));
337 static unsigned long long mod_mul_m1(
unsigned int i,
338 unsigned long long j)
340 long long hi, lo, temp1, temp2;
343 lo = i - (hi * 131072);
344 temp1 = mod_m1(hi * j) * 131072;
345 temp2 = mod_m1(lo * j);
346 lo = mod_m1(temp1 + temp2);
349 lo += ROCRAND_MRG32K3A_M1;
354 static unsigned long long mod_m1(
unsigned long long p)
356 p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M1C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
357 if (p >= ROCRAND_MRG32K3A_M1)
358 p -= ROCRAND_MRG32K3A_M1;
364 static unsigned long long mod_mul_m2(
unsigned int i,
365 unsigned long long j)
367 long long hi, lo, temp1, temp2;
370 lo = i - (hi * 131072);
371 temp1 = mod_m2(hi * j) * 131072;
372 temp2 = mod_m2(lo * j);
373 lo = mod_m2(temp1 + temp2);
376 lo += ROCRAND_MRG32K3A_M2;
381 static unsigned long long mod_m2(
unsigned long long p)
383 p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
384 p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
385 if (p >= ROCRAND_MRG32K3A_M2)
386 p -= ROCRAND_MRG32K3A_M2;
393 mrg32k3a_state m_state;
395 #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
396 friend struct detail::engine_boxmuller_helper<mrg32k3a_engine>;
409 typedef rocrand_device::mrg32k3a_engine rocrand_state_mrg32k3a;
425 const unsigned long long subsequence,
426 const unsigned long long offset,
427 rocrand_state_mrg32k3a * state)
429 *state = rocrand_state_mrg32k3a(seed, subsequence, offset);
445 unsigned int rocrand(rocrand_state_mrg32k3a * state)
448 return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG32K3A_UINT_NORM);
460 void skipahead(
unsigned long long offset, rocrand_state_mrg32k3a * state)
462 return state->discard(offset);
477 return state->discard_subsequence(subsequence);
492 return state->discard_sequence(sequence);
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg32k3a *state)
Initializes MRG32K3A state.
Definition: rocrand_mrg32k3a.h:424
#define ROCRAND_MRG32K3A_DEFAULT_SEED
Default seed for MRG32K3A PRNG.
Definition: rocrand_mrg32k3a.h:53
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by sequence sequences.
Definition: rocrand_mrg32k3a.h:490
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg32k3a.h:475
FQUALIFIERS unsigned int rocrand(rocrand_state_mrg32k3a *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg32k3a.h:445
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by offset elements.
Definition: rocrand_mrg32k3a.h:460
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31