/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg32k3a.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg32k3a.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg32k3a.h Source File
API library
rocrand_mrg32k3a.h
1 // Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_MRG32K3A_H_
22 #define ROCRAND_MRG32K3A_H_
23 
24 #ifndef FQUALIFIERS
25 #define FQUALIFIERS __forceinline__ __device__
26 #endif // FQUALIFIERS_
27 
28 #include "rocrand/rocrand_common.h"
29 #include "rocrand/rocrand_mrg32k3a_precomputed.h"
30 
31 #define ROCRAND_MRG32K3A_POW32 4294967296
32 #define ROCRAND_MRG32K3A_M1 4294967087
33 #define ROCRAND_MRG32K3A_M1C 209
34 #define ROCRAND_MRG32K3A_M2 4294944443
35 #define ROCRAND_MRG32K3A_M2C 22853
36 #define ROCRAND_MRG32K3A_A12 1403580
37 #define ROCRAND_MRG32K3A_A13 (4294967087 - 810728)
38 #define ROCRAND_MRG32K3A_A13N 810728
39 #define ROCRAND_MRG32K3A_A21 527612
40 #define ROCRAND_MRG32K3A_A23 (4294944443 - 1370589)
41 #define ROCRAND_MRG32K3A_A23N 1370589
42 #define ROCRAND_MRG32K3A_NORM_DOUBLE (2.3283065498378288e-10) // 1/ROCRAND_MRG32K3A_M1
43 #define ROCRAND_MRG32K3A_UINT_NORM (1.000000048661607) // (ROCRAND_MRG32K3A_POW32 - 1)/(ROCRAND_MRG32K3A_M1 - 1)
44 
53  #define ROCRAND_MRG32K3A_DEFAULT_SEED 12345ULL // end of group rocranddevice
55 
56 namespace rocrand_device {
57 
58 class mrg32k3a_engine
59 {
60 public:
61  struct mrg32k3a_state
62  {
63  unsigned int g1[3];
64  unsigned int g2[3];
65 
66  #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
67  // The Box–Muller transform requires two inputs to convert uniformly
68  // distributed real values [0; 1] to normally distributed real values
69  // (with mean = 0, and stddev = 1). Often user wants only one
70  // normally distributed number, to save performance and random
71  // numbers the 2nd value is saved for future requests.
72  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
73  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
74  float boxmuller_float; // normally distributed float
75  double boxmuller_double; // normally distributed double
76  #endif
77  };
78 
80  mrg32k3a_engine()
81  {
82  this->seed(ROCRAND_MRG32K3A_DEFAULT_SEED, 0, 0);
83  }
84 
94  mrg32k3a_engine(const unsigned long long seed,
95  const unsigned long long subsequence,
96  const unsigned long long offset)
97  {
98  this->seed(seed, subsequence, offset);
99  }
100 
110  void seed(unsigned long long seed_value,
111  const unsigned long long subsequence,
112  const unsigned long long offset)
113  {
114  if(seed_value == 0)
115  {
116  seed_value = ROCRAND_MRG32K3A_DEFAULT_SEED;
117  }
118  unsigned int x = (unsigned int) seed_value ^ 0x55555555U;
119  unsigned int y = (unsigned int) ((seed_value >> 32) ^ 0xAAAAAAAAU);
120  m_state.g1[0] = mod_mul_m1(x, seed_value);
121  m_state.g1[1] = mod_mul_m1(y, seed_value);
122  m_state.g1[2] = mod_mul_m1(x, seed_value);
123  m_state.g2[0] = mod_mul_m2(y, seed_value);
124  m_state.g2[1] = mod_mul_m2(x, seed_value);
125  m_state.g2[2] = mod_mul_m2(y, seed_value);
126  this->restart(subsequence, offset);
127  }
128 
131  void discard(unsigned long long offset)
132  {
133  this->discard_impl(offset);
134  }
135 
139  void discard_subsequence(unsigned long long subsequence)
140  {
141  this->discard_subsequence_impl(subsequence);
142  }
143 
147  void discard_sequence(unsigned long long sequence)
148  {
149  this->discard_sequence_impl(sequence);
150  }
151 
153  void restart(const unsigned long long subsequence,
154  const unsigned long long offset)
155  {
156  #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
157  m_state.boxmuller_float_state = 0;
158  m_state.boxmuller_double_state = 0;
159  #endif
160  this->discard_subsequence_impl(subsequence);
161  this->discard_impl(offset);
162  }
163 
165  unsigned int operator()()
166  {
167  return this->next();
168  }
169 
170  // Returned value is in range [1, ROCRAND_MRG32K3A_M1],
171  // where ROCRAND_MRG32K3A_M1 < UINT_MAX
173  unsigned int next()
174  {
175  const unsigned int p1 = mod_m1(
176  detail::mad_u64_u32(
177  ROCRAND_MRG32K3A_A12,
178  m_state.g1[1],
179  detail::mad_u64_u32(
180  ROCRAND_MRG32K3A_A13N,
181  (ROCRAND_MRG32K3A_M1 - m_state.g1[0]),
182  0
183  )
184  )
185  );
186 
187  m_state.g1[0] = m_state.g1[1]; m_state.g1[1] = m_state.g1[2];
188  m_state.g1[2] = p1;
189 
190  const unsigned int p2 = mod_m2(
191  detail::mad_u64_u32(
192  ROCRAND_MRG32K3A_A21,
193  m_state.g2[2],
194  detail::mad_u64_u32(
195  ROCRAND_MRG32K3A_A23N,
196  (ROCRAND_MRG32K3A_M2 - m_state.g2[0]),
197  0
198  )
199  )
200  );
201 
202  m_state.g2[0] = m_state.g2[1]; m_state.g2[1] = m_state.g2[2];
203  m_state.g2[2] = p2;
204 
205  return (p1 - p2) + (p1 <= p2 ? ROCRAND_MRG32K3A_M1 : 0);
206  }
207 
208 protected:
209  // Advances the internal state to skip \p offset numbers.
210  // DOES NOT CALCULATE NEW ULONGLONG
212  void discard_impl(unsigned long long offset)
213  {
214  discard_state(offset);
215  }
216 
217  // DOES NOT CALCULATE NEW ULONGLONG
219  void discard_subsequence_impl(unsigned long long subsequence)
220  {
221  int i = 0;
222 
223  while(subsequence > 0) {
224  if (subsequence & 1) {
225  #if defined(__HIP_DEVICE_COMPILE__)
226  mod_mat_vec_m1(d_A1P76 + i, m_state.g1);
227  mod_mat_vec_m2(d_A2P76 + i, m_state.g2);
228  #else
229  mod_mat_vec_m1(h_A1P76 + i, m_state.g1);
230  mod_mat_vec_m2(h_A2P76 + i, m_state.g2);
231  #endif
232  }
233  subsequence >>= 1;
234  i += 9;
235  }
236  }
237 
238  // DOES NOT CALCULATE NEW ULONGLONG
240  void discard_sequence_impl(unsigned long long sequence)
241  {
242  int i = 0;
243 
244  while(sequence > 0) {
245  if (sequence & 1) {
246  #if defined(__HIP_DEVICE_COMPILE__)
247  mod_mat_vec_m1(d_A1P127 + i, m_state.g1);
248  mod_mat_vec_m2(d_A2P127 + i, m_state.g2);
249  #else
250  mod_mat_vec_m1(h_A1P127 + i, m_state.g1);
251  mod_mat_vec_m2(h_A2P127 + i, m_state.g2);
252  #endif
253  }
254  sequence >>= 1;
255  i += 9;
256  }
257  }
258 
259  // Advances the internal state by offset times.
260  // DOES NOT CALCULATE NEW ULONGLONG
262  void discard_state(unsigned long long offset)
263  {
264  int i = 0;
265 
266  while(offset > 0) {
267  if (offset & 1) {
268  #if defined(__HIP_DEVICE_COMPILE__)
269  mod_mat_vec_m1(d_A1 + i, m_state.g1);
270  mod_mat_vec_m2(d_A2 + i, m_state.g2);
271  #else
272  mod_mat_vec_m1(h_A1 + i, m_state.g1);
273  mod_mat_vec_m2(h_A2 + i, m_state.g2);
274  #endif
275  }
276  offset >>= 1;
277  i += 9;
278  }
279  }
280 
281  // Advances the internal state to the next state
282  // DOES NOT CALCULATE NEW ULONGLONG
284  void discard_state()
285  {
286  discard_state(1);
287  }
288 
289 private:
291  static void mod_mat_vec_m1(const unsigned long long * A,
292  unsigned int * s)
293  {
294  unsigned long long x[3];
295 
296  x[0] = mod_m1(mod_m1(A[0] * s[0])
297  + mod_m1(A[1] * s[1])
298  + mod_m1(A[2] * s[2]));
299 
300  x[1] = mod_m1(mod_m1(A[3] * s[0])
301  + mod_m1(A[4] * s[1])
302  + mod_m1(A[5] * s[2]));
303 
304  x[2] = mod_m1(mod_m1(A[6] * s[0])
305  + mod_m1(A[7] * s[1])
306  + mod_m1(A[8] * s[2]));
307 
308  s[0] = x[0];
309  s[1] = x[1];
310  s[2] = x[2];
311  }
312 
314  static void mod_mat_vec_m2(const unsigned long long * A,
315  unsigned int * s)
316  {
317  unsigned long long x[3];
318 
319  x[0] = mod_m2(mod_m2(A[0] * s[0])
320  + mod_m2(A[1] * s[1])
321  + mod_m2(A[2] * s[2]));
322 
323  x[1] = mod_m2(mod_m2(A[3] * s[0])
324  + mod_m2(A[4] * s[1])
325  + mod_m2(A[5] * s[2]));
326 
327  x[2] = mod_m2(mod_m2(A[6] * s[0])
328  + mod_m2(A[7] * s[1])
329  + mod_m2(A[8] * s[2]));
330 
331  s[0] = x[0];
332  s[1] = x[1];
333  s[2] = x[2];
334  }
335 
337  static unsigned long long mod_mul_m1(unsigned int i,
338  unsigned long long j)
339  {
340  long long hi, lo, temp1, temp2;
341 
342  hi = i / 131072;
343  lo = i - (hi * 131072);
344  temp1 = mod_m1(hi * j) * 131072;
345  temp2 = mod_m1(lo * j);
346  lo = mod_m1(temp1 + temp2);
347 
348  if (lo < 0)
349  lo += ROCRAND_MRG32K3A_M1;
350  return lo;
351  }
352 
354  static unsigned long long mod_m1(unsigned long long p)
355  {
356  p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M1C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
357  if (p >= ROCRAND_MRG32K3A_M1)
358  p -= ROCRAND_MRG32K3A_M1;
359 
360  return p;
361  }
362 
364  static unsigned long long mod_mul_m2(unsigned int i,
365  unsigned long long j)
366  {
367  long long hi, lo, temp1, temp2;
368 
369  hi = i / 131072;
370  lo = i - (hi * 131072);
371  temp1 = mod_m2(hi * j) * 131072;
372  temp2 = mod_m2(lo * j);
373  lo = mod_m2(temp1 + temp2);
374 
375  if (lo < 0)
376  lo += ROCRAND_MRG32K3A_M2;
377  return lo;
378  }
379 
381  static unsigned long long mod_m2(unsigned long long p)
382  {
383  p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
384  p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1));
385  if (p >= ROCRAND_MRG32K3A_M2)
386  p -= ROCRAND_MRG32K3A_M2;
387 
388  return p;
389  }
390 
391 protected:
392  // State
393  mrg32k3a_state m_state;
394 
395  #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE
396  friend struct detail::engine_boxmuller_helper<mrg32k3a_engine>;
397  #endif
398 
399 }; // mrg32k3a_engine class
400 
401 } // end namespace rocrand_device
402 
409 typedef rocrand_device::mrg32k3a_engine rocrand_state_mrg32k3a;
411 
424 void rocrand_init(const unsigned long long seed,
425  const unsigned long long subsequence,
426  const unsigned long long offset,
427  rocrand_state_mrg32k3a * state)
428 {
429  *state = rocrand_state_mrg32k3a(seed, subsequence, offset);
430 }
431 
445 unsigned int rocrand(rocrand_state_mrg32k3a * state)
446 {
447  // next() in [1, ROCRAND_MRG32K3A_M1]
448  return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG32K3A_UINT_NORM);
449 }
450 
460 void skipahead(unsigned long long offset, rocrand_state_mrg32k3a * state)
461 {
462  return state->discard(offset);
463 }
464 
475 void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg32k3a * state)
476 {
477  return state->discard_subsequence(subsequence);
478 }
479 
490 void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg32k3a * state)
491 {
492  return state->discard_sequence(sequence);
493 }
494 
495 #endif // ROCRAND_MRG32K3A_H_
496  // end of group rocranddevice
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg32k3a *state)
Initializes MRG32K3A state.
Definition: rocrand_mrg32k3a.h:424
#define ROCRAND_MRG32K3A_DEFAULT_SEED
Default seed for MRG32K3A PRNG.
Definition: rocrand_mrg32k3a.h:53
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by sequence sequences.
Definition: rocrand_mrg32k3a.h:490
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg32k3a.h:475
FQUALIFIERS unsigned int rocrand(rocrand_state_mrg32k3a *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg32k3a.h:445
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg32k3a *state)
Updates MRG32K3A state to skip ahead by offset elements.
Definition: rocrand_mrg32k3a.h:460
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31